Cookie validation logic

2026-03-02 09:04:33 +00:00 · 2026-02-28 21:37:39 +01:00
parent 80444405e9
commit e3e14293cd
15 changed files with 802 additions and 398 deletions
--- a/download.py
+++ b/download.py
@@ -17,6 +17,8 @@ import re
 import shutil
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Any, Optional
+import requests

 from check_clashes import (
    make_session,
@@ -25,32 +27,35 @@ from check_clashes import (
    find_clashes,
    build_download_paths,
    fetch_sizes,
+    load_video_map,
+    is_valid_url,
+    VIDEO_MAP_FILE,
 )

-VIDEO_MAP_FILE = "video_map.json"
 CHUNK_SIZE = 8 * 1024 * 1024
-DEFAULT_OUTPUT = "downloads"
-DEFAULT_WORKERS = 4
-MODE_FILE = ".naming_mode"
-MODE_ORIGINAL = "original"
-MODE_TITLE = "title"
+DEFAULT_OUTPUT: str = "downloads"
+DEFAULT_WORKERS: int = 4
+MODE_FILE: str = ".naming_mode"
+MODE_ORIGINAL: str = "original"
+MODE_TITLE: str = "title"


 # ── Naming mode persistence ──────────────────────────────────────────

-def read_mode(output_dir):
+
+def read_mode(output_dir: str | Path) -> Optional[str]:
    p = Path(output_dir) / MODE_FILE
    if p.exists():
        return p.read_text().strip()
    return None


-def write_mode(output_dir, mode):
+def write_mode(output_dir: str | Path, mode: str) -> None:
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    (Path(output_dir) / MODE_FILE).write_text(mode)


-def resolve_mode(args):
+def resolve_mode(args: argparse.Namespace) -> str:
    """Determine naming mode from CLI flags + saved marker. Returns mode string."""
    saved = read_mode(args.output)

@@ -69,13 +74,18 @@ def resolve_mode(args):

 # ── Filename helpers ─────────────────────────────────────────────────

-def sanitize_filename(title, max_len=180):
-    name = re.sub(r'[<>:"/\\|?*]', '', title)
-    name = re.sub(r'\s+', ' ', name).strip().rstrip('.')
+
+def sanitize_filename(title: str, max_len: int = 180) -> str:
+    name = re.sub(r'[<>:"/\\|?*]', "", title)
+    name = re.sub(r"\s+", " ", name).strip().rstrip(".")
    return name[:max_len].rstrip() if len(name) > max_len else name


-def build_title_paths(urls, url_to_title, output_dir):
+def build_title_paths(
+    urls: list[str],
+    url_to_title: dict[str, str],
+    output_dir: str | Path,
+) -> dict[str, Path]:
    name_to_urls = defaultdict(list)
    url_to_base = {}

@@ -91,14 +101,19 @@ def build_title_paths(urls, url_to_title, output_dir):
        base, ext = url_to_base[url]
        full = base + ext
        if len(name_to_urls[full]) > 1:
-            slug = url_to_filename(url).rsplit('.', 1)[0]
+            slug = url_to_filename(url).rsplit(".", 1)[0]
            paths[url] = Path(output_dir) / f"{base} [{slug}]{ext}"
        else:
            paths[url] = Path(output_dir) / full
    return paths


-def get_paths_for_mode(mode, urls, video_map, output_dir):
+def get_paths_for_mode(
+    mode: str,
+    urls: list[str],
+    video_map: dict[str, Any],
+    output_dir: str | Path,
+) -> dict[str, Path]:
    if mode == MODE_TITLE:
        url_title = build_url_title_map(video_map)
        return build_title_paths(urls, url_title, output_dir)
@@ -107,7 +122,14 @@ def get_paths_for_mode(mode, urls, video_map, output_dir):

 # ── Reorganize ───────────────────────────────────────────────────────

-def reorganize(urls, video_map, output_dir, target_mode, dry_run=False):
+
+def reorganize(
+    urls: list[str],
+    video_map: dict[str, Any],
+    output_dir: str | Path,
+    target_mode: str,
+    dry_run: bool = False,
+) -> None:
    """Rename existing files from one naming scheme to another."""
    other_mode = MODE_TITLE if target_mode == MODE_ORIGINAL else MODE_ORIGINAL
    old_paths = get_paths_for_mode(other_mode, urls, video_map, output_dir)
@@ -163,21 +185,27 @@ def reorganize(urls, video_map, output_dir, target_mode, dry_run=False):

 # ── Download ─────────────────────────────────────────────────────────

-def download_one(session, url, dest, expected_size):
+
+def download_one(
+    session: requests.Session,
+    url: str,
+    dest: str | Path,
+    expected_size: Optional[int],
+) -> tuple[str, int]:
    dest = Path(dest)
    part = dest.parent / (dest.name + ".part")
    dest.parent.mkdir(parents=True, exist_ok=True)

    if dest.exists():
        local = dest.stat().st_size
-        if expected_size and local == expected_size:
+        if expected_size is not None and local == expected_size:
            return "ok", 0
-        if expected_size and local != expected_size:
+        if expected_size is not None and local != expected_size:
            dest.unlink()

    existing = part.stat().st_size if part.exists() else 0
    headers = {}
-    if existing and expected_size and existing < expected_size:
+    if existing and expected_size is not None and existing < expected_size:
        headers["Range"] = f"bytes={existing}-"

    try:
@@ -205,33 +233,21 @@ def download_one(session, url, dest, expected_size):
        return f"error: {e}", written

    final_size = existing + written
-    if expected_size and final_size != expected_size:
+    if expected_size is not None and final_size != expected_size:
        return "size_mismatch", written

    part.rename(dest)
    return "ok", written


-# ── Data loading ─────────────────────────────────────────────────────
-
-def load_video_map():
-    with open(VIDEO_MAP_FILE, encoding="utf-8") as f:
-        return json.load(f)
-
-
-def _is_valid_url(url):
-    return url.startswith(
-        "http") and "<" not in url and ">" not in url and " href=" not in url
-
-
-def collect_urls(video_map):
+def collect_urls(video_map: dict[str, Any]) -> list[str]:
    urls, seen, skipped = [], set(), 0
    for entry in video_map.values():
        for video_url in entry.get("videos", []):
            if video_url in seen:
                continue
            seen.add(video_url)
-            if _is_valid_url(video_url):
+            if is_valid_url(video_url):
                urls.append(video_url)
            else:
                skipped += 1
@@ -240,7 +256,7 @@ def collect_urls(video_map):
    return urls


-def build_url_title_map(video_map):
+def build_url_title_map(video_map: dict[str, Any]) -> dict[str, str]:
    url_title = {}
    for entry in video_map.values():
        title = entry.get("title", "")
@@ -252,24 +268,44 @@ def build_url_title_map(video_map):

 # ── Main ─────────────────────────────────────────────────────────────

-def main():
-    parser = argparse.ArgumentParser(
-        description="Download videos from video_map.json")
-    parser.add_argument("--output", "-o", default=DEFAULT_OUTPUT,
-                        help=f"Download directory (default: {DEFAULT_OUTPUT})")
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Download videos from video_map.json")
+    parser.add_argument(
+        "--output",
+        "-o",
+        default=DEFAULT_OUTPUT,
+        help=f"Download directory (default: {DEFAULT_OUTPUT})",
+    )

    naming = parser.add_mutually_exclusive_group()
-    naming.add_argument("--titles", "-t", action="store_true",
-                        help="Use title-based filenames (saved as default for this directory)")
-    naming.add_argument("--original", action="store_true",
-                        help="Use original CloudFront filenames (saved as default for this directory)")
+    naming.add_argument(
+        "--titles",
+        "-t",
+        action="store_true",
+        help="Use title-based filenames (saved as default for this directory)",
+    )
+    naming.add_argument(
+        "--original",
+        action="store_true",
+        help="Use original CloudFront filenames (saved as default for this directory)",
+    )

-    parser.add_argument("--reorganize", action="store_true",
-                        help="Rename existing files to match the current naming mode")
-    parser.add_argument("--dry-run", "-n", action="store_true",
-                        help="Preview without making changes")
-    parser.add_argument("--workers", "-w", type=int, default=DEFAULT_WORKERS,
-                        help=f"Concurrent downloads (default: {DEFAULT_WORKERS})")
+    parser.add_argument(
+        "--reorganize",
+        action="store_true",
+        help="Rename existing files to match the current naming mode",
+    )
+    parser.add_argument(
+        "--dry-run", "-n", action="store_true", help="Preview without making changes"
+    )
+    parser.add_argument(
+        "--workers",
+        "-w",
+        type=int,
+        default=DEFAULT_WORKERS,
+        help=f"Concurrent downloads (default: {DEFAULT_WORKERS})",
+    )
    args = parser.parse_args()

    video_map = load_video_map()
@@ -287,7 +323,8 @@ def main():
        if mode_changed and not args.reorganize:
            print(f"\n[!] Mode changed from '{saved}' to '{mode}'.")
            print(
-                "    Use --reorganize to rename existing files, or --dry-run to preview.")
+                "    Use --reorganize to rename existing files, or --dry-run to preview."
+            )
            print("    Refusing to download until existing files are reorganized.")
            return
        reorganize(urls, video_map, args.output, mode, dry_run=args.dry_run)
@@ -303,7 +340,8 @@ def main():
    clashes = find_clashes(urls)
    if clashes:
        print(
-            f"[+] {len(clashes)} filename clash(es) resolved with subfolders/suffixes")
+            f"[+] {len(clashes)} filename clash(es) resolved with subfolders/suffixes"
+        )

    already = [u for u in urls if paths[u].exists()]
    pending = [u for u in urls if not paths[u].exists()]
@@ -316,8 +354,7 @@ def main():
        return

    if args.dry_run:
-        print(
-            f"\n[dry-run] Would download {len(pending)} files to {args.output}/")
+        print(f"\n[dry-run] Would download {len(pending)} files to {args.output}/")
        for url in pending[:20]:
            print(f"  → {paths[url].name}")
        if len(pending) > 20:
@@ -330,8 +367,7 @@ def main():

    sized = {u: s for u, s in remote_sizes.items() if s is not None}
    total_bytes = sum(sized.values())
-    print(
-        f"[+] Download size: {fmt_size(total_bytes)} across {len(pending)} files")
+    print(f"[+] Download size: {fmt_size(total_bytes)} across {len(pending)} files")

    if already:
        print(f"[+] Verifying {len(already)} existing files…")
@@ -344,14 +380,15 @@ def main():
        remote = already_sizes.get(url)
        if remote and local != remote:
            mismatched += 1
-            print(f"[!] Size mismatch: {dest.name} "
-                  f"(local {fmt_size(local)} vs remote {fmt_size(remote)})")
+            print(
+                f"[!] Size mismatch: {dest.name} "
+                f"(local {fmt_size(local)} vs remote {fmt_size(remote)})"
+            )
            pending.append(url)
            remote_sizes[url] = remote

    if mismatched:
-        print(
-            f"[!] {mismatched} file(s) will be re-downloaded due to size mismatch")
+        print(f"[!] {mismatched} file(s) will be re-downloaded due to size mismatch")

    print(f"\n[⚡] Downloading with {args.workers} threads…\n")

@@ -361,7 +398,7 @@ def main():
    total = len(pending)
    interrupted = False

-    def do_download(url):
+    def do_download(url: str) -> tuple[str, tuple[str, int]]:
        dest = paths[url]
        expected = remote_sizes.get(url)
        return url, download_one(session, url, dest, expected)
@@ -376,11 +413,9 @@ def main():
                name = paths[url].name

                if status == "ok" and written > 0:
-                    print(
-                        f"  [{completed}/{total}] ✓ {name} ({fmt_size(written)})")
+                    print(f"  [{completed}/{total}] ✓ {name} ({fmt_size(written)})")
                elif status == "ok":
-                    print(
-                        f"  [{completed}/{total}] ✓ {name} (already complete)")
+                    print(f"  [{completed}/{total}] ✓ {name} (already complete)")
                elif status == "size_mismatch":
                    print(f"  [{completed}/{total}] ⚠ {name} (size mismatch)")
                    failed.append(url)