Multi-gooner support because who knows

2026-06-30 00:27:13 +00:00 · 2026-03-01 01:36:01 +01:00
parent 80444405e9
commit 4a5b0a6ee3
15 changed files with 16378 additions and 11575 deletions
@@ -1,8 +1,17 @@
-# Copy your wordpress_logged_in_... cookie from browser DevTools → Storage → Cookies.
+# jailbirdz.com credentials
-# Paste the full name=value pair below.
+# These are used to obtain a fresh login cookie via the WooCommerce AJAX endpoint.
-# wordpress_sec_... is the wp-admin cookie — irrelevant for read-only viewers.
+JAILBIRDZ_USERNAME=your-email-or-username
-# __cf_bm is a Cloudflare bot-management cookie — also not needed.
+JAILBIRDZ_PASSWORD=your-password
-WP_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>
+
 # Alternatively, set the cookie manually (expires in ~2 weeks).
 # Get it from browser DevTools → Storage → Cookies while on jailbirdz.com.
 # Copy the full name=value of the wordpress_logged_in_* cookie.
 JAILBIRDZ_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>
 # pinkcuffs.com credentials (separate membership — different from jailbirdz)
 PINKCUFFS_USERNAME=your-email-or-username
 PINKCUFFS_PASSWORD=your-password
 PINKCUFFS_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>
 # PeerTube upload target
 PEERTUBE_URL=https://your-peertube-instance.example
@@ -0,0 +1,49 @@
 name: Nightly Index
 on:
  schedule:
    - cron: '0 3 * * *'  # 03:00 UTC daily
  workflow_dispatch:      # manual trigger via GitHub UI
 permissions:
  contents: write         # needed to push video_map.json back
 concurrency:
  group: nightly-index
  cancel-in-progress: false  # let an in-progress scrape finish; queue the next run
 jobs:
  index:
    runs-on: ubuntu-latest
    timeout-minutes: 300  # 5 h ceiling; scraper resumes where it left off on next run
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: '3.12'
          cache: pip
      - name: Install dependencies
        run: pip install -r requirements.txt
      - name: Install Playwright Firefox
        run: playwright install firefox --with-deps
      - name: Run scraper
        run: python main.py
        env:
          JAILBIRDZ_USERNAME: ${{ secrets.JAILBIRDZ_USERNAME }}
          JAILBIRDZ_PASSWORD: ${{ secrets.JAILBIRDZ_PASSWORD }}
          PINKCUFFS_USERNAME: ${{ secrets.PINKCUFFS_USERNAME }}
          PINKCUFFS_PASSWORD: ${{ secrets.PINKCUFFS_PASSWORD }}
      - name: Commit updated video_map.json
        if: always()  # save progress even if main.py crashed or timed out
        run: |
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"
          git add video_map.json
          git diff --staged --quiet || git commit -m "chore: nightly index update [skip ci]"
          git push
@@ -1,5 +1,14 @@
 # Temporary cache
 __pycache__/
 .ruff_cache/
 # Local IDE config
 .vscode
 # Project output & artifacts
 downloads/
 *.mp4
 *.mp4.part
 # Secrets & sensitive info
 .env
@@ -1,4 +0,0 @@
 {
    "snyk.advanced.organization": "512ef4a1-6034-4537-a391-9692d282122a",
    "snyk.advanced.autoSelectOrganization": true
 }
@@ -1,6 +1,6 @@
 # 𝒥𝒶𝒾𝓁𝒷𝒾𝓇𝒹𝓏-𝒹𝓁
-Jailbirdz.com is an Arizona-based subscription video site publishing arrest and jail roleplay scenarios featuring women. This tool scrapes the member area, downloads the videos, and re-hosts them on a self-owned PeerTube instance.
+Jailbirdz.com and Pinkcuffs.com are Arizona-based subscription video sites publishing arrest and jail roleplay scenarios featuring women. This tool scrapes the member area of one or both sites, downloads the videos, and re-hosts them on a self-owned PeerTube instance.
 > [!NOTE]  
 > This tool does not bypass authentication, modify the site, or intercept anything it isn't entitled to. A valid, paid membership is required. The scraper authenticates using your own session cookie and accesses only content your account can already view in a browser.
@@ -19,23 +19,22 @@ Jailbirdz.com is an Arizona-based subscription video site publishing arrest and
 cp .env.example .env
 ```
-### WP_LOGIN_COOKIE
+### Credentials
-You need to be logged into jailbirdz.com in a browser. Then either:
+Set credentials for whichever sites you have a membership on. You don't need both.
-**Option A — auto (recommended):** let `grab_cookie.py` read it from your browser and write it to `.env` automatically:
+**Option A — credentials (recommended):** set `JAILBIRDZ_USERNAME` + `JAILBIRDZ_PASSWORD` (and/or the `PINKCUFFS_*` equivalents) in `.env`. `main.py` logs in automatically on startup.
-```bash
+**Option B — manual cookie:** set `JAILBIRDZ_LOGIN_COOKIE` (and/or `PINKCUFFS_LOGIN_COOKIE`) yourself. Get the value from browser DevTools → Storage → Cookies — copy the full `name=value` of the `wordpress_logged_in_*` cookie.
 python grab_cookie.py              # tries Firefox, Chrome, Edge, Brave in order
 python grab_cookie.py -b firefox   # or target a specific browser
 ```
-> **Note:** Chrome and Edge on Windows 130+ require the script to run as Administrator due to App-bound Encryption. Firefox works without elevated privileges.
+Sites with no credentials are skipped automatically when running `python main.py`.
-**Option B — manual:** open `.env` and set `WP_LOGIN_COOKIE` yourself. Get the value from browser DevTools → Storage → Cookies while on jailbirdz.com — copy the full `name=value` of the `wordpress_logged_in_*` cookie.
+### `.env` values
 ### Other `.env` values
 - `JAILBIRDZ_USERNAME` / `JAILBIRDZ_PASSWORD` — jailbirdz.com login.
 - `JAILBIRDZ_LOGIN_COOKIE` — jailbirdz.com session cookie (fallback).
 - `PINKCUFFS_USERNAME` / `PINKCUFFS_PASSWORD` — pinkcuffs.com login.
 - `PINKCUFFS_LOGIN_COOKIE` — pinkcuffs.com session cookie (fallback).
 - `PEERTUBE_URL` — base URL of your PeerTube instance.
 - `PEERTUBE_USER` — PeerTube username.
 - `PEERTUBE_CHANNEL` — channel to upload to.
@@ -48,7 +47,9 @@ python grab_cookie.py -b firefox   # or target a specific browser
 Discovers all post URLs via the WordPress REST API, then visits each page with a headless Firefox browser to intercept video network requests (MP4, MOV, WebM, AVI, M4V).
 ```bash
-python main.py
+python main.py                    # scrape all sites you have credentials for
 python main.py --site jailbirdz   # scrape one site only
 python main.py --site pinkcuffs --site jailbirdz  # explicit multi-site
 ```
 Results are written to `video_map.json`. Safe to re-run — already-scraped posts are skipped.
@@ -65,6 +66,7 @@ Options:
      --reorganize      Rename existing files to match current naming mode
  -w, --workers N       Concurrent downloads (default: 4)
  -n, --dry-run         Print what would be downloaded
      --site SITE       Limit to one site (jailbirdz or pinkcuffs); repeatable
 ```
 Resumes partial downloads. The chosen naming mode is saved to `.naming_mode` inside the output directory and persists across runs. Filenames that would clash are placed into subfolders.
@@ -89,6 +91,30 @@ Options:
 Uploads in resumable 10 MB chunks. After each batch, waits for transcoding and object storage to complete before uploading the next batch — this prevents disk exhaustion on the PeerTube server. Videos already present on the channel (matched by name) are skipped. Progress is tracked in `.uploaded` inside the input directory.
 ## CI / Nightly Indexing
 `.github/workflows/nightly-index.yml` runs `main.py` at 03:00 UTC daily and commits any new `video_map.json` entries back to the repo.
 **One-time setup — add repo secrets for each site you have a membership on:**
 ```bash
 # jailbirdz (if you have a membership)
 gh secret set JAILBIRDZ_USERNAME
 gh secret set JAILBIRDZ_PASSWORD
 # pinkcuffs (if you have a membership)
 gh secret set PINKCUFFS_USERNAME
 gh secret set PINKCUFFS_PASSWORD
 ```
 **Seed CI with your current progress before the first run:**
 ```bash
 git add video_map.json && git commit -m "chore: seed video_map"
 ```
 **Trigger manually:** Actions → Nightly Index → Run workflow.
 ## Utilities
 ### Check for filename clashes
@@ -5,42 +5,142 @@ Importable functions:
    find_clashes(urls)         - {filename: [urls]} for filenames with >1 source
    build_download_paths(urls, output_dir) - {url: local_path} with clash resolution
    fmt_size(bytes)            - human-readable size string
-    get_remote_size(session, url) - file size via HEAD without downloading
+    get_remote_size(session, url, referer) - file size via HEAD without downloading
-    fetch_sizes(urls, workers, on_progress) - bulk size lookup
+    fetch_sizes(urls, workers, on_progress, url_referers, session) - bulk size lookup
    make_session()             - requests.Session with required headers
-    load_video_map()           - load video_map.json, returns {} on missing/corrupt
+    load_video_map(site, path) - load video_map.json; auto-migrates old flat format
    save_video_map(video_map, site_key, path) - atomic write of one site's entries
    build_url_referers(video_map) - {cdn_url: referer} derived from page URL keys
    is_valid_url(url)          - True if url is a plain http(s) URL with no HTML artefacts
    expects_video(url)         - True if url is a members-only video page
 """
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path, PurePosixPath
 from typing import Any, Optional, cast
 from collections.abc import Callable
 from urllib.parse import urlparse, unquote
 import json
 import os
 import tempfile
 import requests
 from config import BASE_URL
-REFERER = f"{BASE_URL}/"
+VIDEO_MAP_FILE: str = "video_map.json"
-VIDEO_MAP_FILE = "video_map.json"
+VIDEO_EXTS: set[str] = {".mp4", ".mov", ".m4v", ".webm", ".avi"}
 VIDEO_EXTS = {".mp4", ".mov", ".m4v", ".webm", ".avi"}
-def load_video_map():
+def is_valid_url(url: str) -> bool:
-    if Path(VIDEO_MAP_FILE).exists():
+    """True if url is a plain http(s) URL with no HTML artefacts (<, >, href= etc.)."""
    return (
        url.startswith("http")
        and "<" not in url
        and ">" not in url
        and " href=" not in url
    )
 def expects_video(url: str) -> bool:
    """True if url is a members-only video page that should contain a video."""
    return "/pinkcuffs-videos/" in url
 def _write_video_map_atomic(data: dict[str, Any], path: Path) -> None:
    """Write the full nested video_map dict to disk atomically via a temp file."""
    fd, tmp = tempfile.mkstemp(dir=path.resolve().parent, suffix=".tmp")
    try:
-            with open(VIDEO_MAP_FILE, encoding="utf-8") as f:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
-                return json.load(f)
+            json.dump(data, f, indent=2, ensure_ascii=False)
        Path(tmp).replace(path)
    except Exception:
        try:
            Path(tmp).unlink()
        except OSError:
            pass
        raise
 def load_video_map(
    site: str | None = None,
    path: str | Path = VIDEO_MAP_FILE,
 ) -> dict[str, Any]:
    """Load video_map.json.
    Args:
        site: If given, return only that site's inner dict {url: entry}.
              If None, return a flat-merged dict across all sites.
        path: Path to the JSON file (injectable for tests).
    """
    p = Path(path)
    if not p.exists():
        return {}
    try:
        with open(p, encoding="utf-8") as f:
            raw: Any = json.load(f)
        data = cast(dict[str, Any], raw)
    except (json.JSONDecodeError, OSError):
        return {}
-    return {}
+
    if site is not None:
        return cast(dict[str, Any], data.get(site, {}))
    # Merge all sites into a flat dict for backward-compat callers
    merged: dict[str, Any] = {}
    for site_entries in data.values():
        if isinstance(site_entries, dict):
            merged.update(cast(dict[str, Any], site_entries))
    return merged
-def make_session():
+def save_video_map(
-    s = requests.Session()
+    video_map: dict[str, Any],
-    s.headers.update({"Referer": REFERER})
+    site_key: str,
-    return s
+    path: str | Path = VIDEO_MAP_FILE,
 ) -> None:
    """Atomically update one site's entries in the nested video_map.json.
    Args:
        video_map: The inner {url: entry} dict for site_key.
        site_key:  Which top-level key to update (e.g. "jailbirdz").
        path:      Path to the JSON file (injectable for tests).
    """
    p = Path(path)
    if p.exists():
        try:
            with open(p, encoding="utf-8") as f:
                raw: Any = json.load(f)
            full = cast(dict[str, Any], raw)
        except (json.JSONDecodeError, OSError):
            full = {}
    else:
        full = {}
    full[site_key] = video_map
    _write_video_map_atomic(full, p)
-def fmt_size(b):
+def build_url_referers(video_map: dict[str, Any]) -> dict[str, str]:
    """Pure function: return {cdn_video_url: site_referer} from a flat video map.
    The flat video map has page URLs as keys; the scheme+netloc of each page URL
    is used as the Referer for all CDN video URLs found in that entry.
    """
    result: dict[str, str] = {}
    for page_url, entry in video_map.items():
        parsed = urlparse(page_url)
        referer = f"{parsed.scheme}://{parsed.netloc}/"
        for vid in cast(dict[str, Any], entry).get("videos", []):
            if isinstance(vid, str):
                result.setdefault(vid, referer)
    return result
 def make_session() -> requests.Session:
    return requests.Session()
 def fmt_size(b: float | int) -> str:
    for unit in ("B", "KB", "MB", "GB"):
        if b < 1024:
            return f"{b:.1f} {unit}"
@@ -48,30 +148,34 @@ def fmt_size(b):
    return f"{b:.1f} TB"
-def url_to_filename(url):
+def url_to_filename(url: str) -> str:
    return unquote(PurePosixPath(urlparse(url).path).name)
-def find_clashes(urls):
+def find_clashes(urls: list[str]) -> dict[str, list[str]]:
    # Case-insensitive grouping so that e.g. "DaisyArrest.mp4" and
    # "daisyarrest.mp4" are treated as a clash.  This is required for
    # correctness on case-insensitive filesystems (NTFS, exFAT, macOS HFS+)
    # and harmless on case-sensitive ones (ext4) — the actual filenames on
    # disk keep their original casing; only the clash *detection* is folded.
-    by_lower = defaultdict(list)
+    by_lower: defaultdict[str, list[str]] = defaultdict(list)
    for url in urls:
        by_lower[url_to_filename(url).lower()].append(url)
-    return {url_to_filename(srcs[0]): srcs
+    return {
-            for srcs in by_lower.values() if len(srcs) > 1}
+        url_to_filename(srcs[0]): srcs for srcs in by_lower.values() if len(srcs) > 1
    }
-def _clash_subfolder(url):
+def _clash_subfolder(url: str) -> str:
    """Parent path segment used as disambiguator for clashing filenames."""
    parts = urlparse(url).path.rstrip("/").split("/")
    return unquote(parts[-2]) if len(parts) >= 2 else "unknown"
-def build_download_paths(urls, output_dir):
+def build_download_paths(
    urls: list[str],
    output_dir: str | Path,
 ) -> dict[str, Path]:
    """Map each URL to a local file path. Flat layout; clashing names get a subfolder."""
    clashes = find_clashes(urls)
    clash_lower = {name.lower() for name in clashes}
@@ -86,16 +190,25 @@ def build_download_paths(urls, output_dir):
    return paths
-def get_remote_size(session, url):
+def get_remote_size(
    session: requests.Session,
    url: str,
    referer: str = "",
 ) -> int | None:
    extra = {"Referer": referer} if referer else {}
    try:
-        r = session.head(url, allow_redirects=True, timeout=15)
+        r = session.head(url, headers=extra, allow_redirects=True, timeout=15)
        if r.status_code < 400 and "Content-Length" in r.headers:
            return int(r.headers["Content-Length"])
    except Exception:
        pass
    try:
        r = session.get(
-            url, headers={"Range": "bytes=0-0"}, stream=True, timeout=15)
+            url,
            headers={"Range": "bytes=0-0", **extra},
            stream=True,
            timeout=15,
        )
        r.close()
        cr = r.headers.get("Content-Range", "")
        if "/" in cr:
@@ -105,19 +218,30 @@ def get_remote_size(session, url):
    return None
-def fetch_sizes(urls, workers=20, on_progress=None):
+def fetch_sizes(
    urls: list[str],
    workers: int = 20,
    on_progress: Callable[[int, int], None] | None = None,
    url_referers: dict[str, str] | None = None,
    session: requests.Session | None = None,
 ) -> dict[str, int | None]:
    """Return {url: size_or_None}. on_progress(done, total) called after each URL."""
    if session is None:
        session = make_session()
-    sizes = {}
+    referers = url_referers or {}
    sizes: dict[str, int | None] = {}
    total = len(urls)
    with ThreadPoolExecutor(max_workers=workers) as pool:
-        futures = {pool.submit(get_remote_size, session, u): u for u in urls}
+        futures = {
            pool.submit(get_remote_size, session, u, referers.get(u, "")): u
            for u in urls
        }
        done = 0
        for fut in as_completed(futures):
            sizes[futures[fut]] = fut.result()
            done += 1
-            if on_progress:
+            if on_progress is not None:
                on_progress(done, total)
    return sizes
@@ -125,14 +249,20 @@ def fetch_sizes(urls, workers=20, on_progress=None):
 # --------------- CLI ---------------
-def main():
+
 def main() -> None:
    vm = load_video_map()
-    urls = [u for entry in vm.values() for u in entry.get("videos", []) if u.startswith("http")]
+    urls = [
        u
        for entry in vm.values()
        for u in entry.get("videos", [])
        if u.startswith("http")
    ]
    clashes = find_clashes(urls)
    print(f"Total URLs: {len(urls)}")
-    by_name = defaultdict(list)
+    by_name: defaultdict[str, list[str]] = defaultdict(list)
    for url in urls:
        by_name[url_to_filename(url)].append(url)
    print(f"Unique filenames: {len(by_name)}")
@@ -142,8 +272,9 @@ def main():
        return
    clash_urls = [u for srcs in clashes.values() for u in srcs]
    url_referers = build_url_referers(vm)
    print(f"\n[+] Fetching file sizes for {len(clash_urls)} clashing URLs…")
-    sizes = fetch_sizes(clash_urls)
+    sizes = fetch_sizes(clash_urls, url_referers=url_referers)
    print(f"\n{len(clashes)} filename clash(es):\n")
    for name, srcs in sorted(clashes.items()):
@@ -1,2 +1,15 @@
-BASE_URL = "https://www.jailbirdz.com"
+# config.py
-COOKIE_DOMAIN = "jailbirdz.com"  # rookiepy domain filter (no www)
+from typing import Final
 SITES: Final[dict[str, dict[str, str]]] = {
    "jailbirdz": {
        "base_url": "https://www.jailbirdz.com",
        "cookie_domain": "jailbirdz.com",
        "env_prefix": "JAILBIRDZ",
    },
    "pinkcuffs": {
        "base_url": "https://www.pinkcuffs.com",
        "cookie_domain": "pinkcuffs.com",
        "env_prefix": "PINKCUFFS",
    },
 }
@@ -11,12 +11,13 @@ Usage:
 """
 import argparse
 import json
 from pathlib import Path
 import re
 import shutil
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any, Optional
 import requests
 from check_clashes import (
    make_session,
@@ -24,33 +25,38 @@ from check_clashes import (
    url_to_filename,
    find_clashes,
    build_download_paths,
    build_url_referers,
    fetch_sizes,
    load_video_map,
    is_valid_url,
    VIDEO_MAP_FILE,
 )
 from config import SITES
 VIDEO_MAP_FILE = "video_map.json"
 CHUNK_SIZE = 8 * 1024 * 1024
-DEFAULT_OUTPUT = "downloads"
+DEFAULT_OUTPUT: str = "downloads"
-DEFAULT_WORKERS = 4
+DEFAULT_WORKERS: int = 4
-MODE_FILE = ".naming_mode"
+MODE_FILE: str = ".naming_mode"
-MODE_ORIGINAL = "original"
+MODE_ORIGINAL: str = "original"
-MODE_TITLE = "title"
+MODE_TITLE: str = "title"
 # ── Naming mode persistence ──────────────────────────────────────────
-def read_mode(output_dir):
+
 def read_mode(output_dir: str | Path) -> str | None:
    p = Path(output_dir) / MODE_FILE
    if p.exists():
        return p.read_text().strip()
    return None
-def write_mode(output_dir, mode):
+def write_mode(output_dir: str | Path, mode: str) -> None:
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    (Path(output_dir) / MODE_FILE).write_text(mode)
-def resolve_mode(args):
+def resolve_mode(args: argparse.Namespace) -> str:
    """Determine naming mode from CLI flags + saved marker. Returns mode string."""
    saved = read_mode(args.output)
@@ -69,13 +75,18 @@ def resolve_mode(args):
 # ── Filename helpers ─────────────────────────────────────────────────
-def sanitize_filename(title, max_len=180):
+
-    name = re.sub(r'[<>:"/\\|?*]', '', title)
+def sanitize_filename(title: str, max_len: int = 180) -> str:
-    name = re.sub(r'\s+', ' ', name).strip().rstrip('.')
+    name = re.sub(r'[<>:"/\\|?*]', "", title)
    name = re.sub(r"\s+", " ", name).strip().rstrip(".")
    return name[:max_len].rstrip() if len(name) > max_len else name
-def build_title_paths(urls, url_to_title, output_dir):
+def build_title_paths(
    urls: list[str],
    url_to_title: dict[str, str],
    output_dir: str | Path,
 ) -> dict[str, Path]:
    name_to_urls = defaultdict(list)
    url_to_base = {}
@@ -91,14 +102,33 @@ def build_title_paths(urls, url_to_title, output_dir):
        base, ext = url_to_base[url]
        full = base + ext
        if len(name_to_urls[full]) > 1:
-            slug = url_to_filename(url).rsplit('.', 1)[0]
+            slug = url_to_filename(url).rsplit(".", 1)[0]
            paths[url] = Path(output_dir) / f"{base} [{slug}]{ext}"
        else:
            paths[url] = Path(output_dir) / full
    return paths
-def get_paths_for_mode(mode, urls, video_map, output_dir):
+def get_paths_for_mode(
    mode: str,
    urls: list[str],
    video_map: dict[str, Any],
    output_dir: str | Path,
    url_to_site: dict[str, str] | None = None,
 ) -> dict[str, Path]:
    if url_to_site:
        by_site: dict[str, list[str]] = defaultdict(list)
        for u in urls:
            by_site[url_to_site.get(u, "")].append(u)
        paths: dict[str, Path] = {}
        url_title = build_url_title_map(video_map) if mode == MODE_TITLE else {}
        for site, site_urls in by_site.items():
            base = Path(output_dir) / site if site else Path(output_dir)
            if mode == MODE_TITLE:
                paths.update(build_title_paths(site_urls, url_title, base))
            else:
                paths.update(build_download_paths(site_urls, base))
        return paths
    if mode == MODE_TITLE:
        url_title = build_url_title_map(video_map)
        return build_title_paths(urls, url_title, output_dir)
@@ -107,11 +137,21 @@ def get_paths_for_mode(mode, urls, video_map, output_dir):
 # ── Reorganize ───────────────────────────────────────────────────────
-def reorganize(urls, video_map, output_dir, target_mode, dry_run=False):
+
 def reorganize(
    urls: list[str],
    video_map: dict[str, Any],
    output_dir: str | Path,
    target_mode: str,
    dry_run: bool = False,
    url_to_site: dict[str, str] | None = None,
 ) -> None:
    """Rename existing files from one naming scheme to another."""
    other_mode = MODE_TITLE if target_mode == MODE_ORIGINAL else MODE_ORIGINAL
-    old_paths = get_paths_for_mode(other_mode, urls, video_map, output_dir)
+    old_paths = get_paths_for_mode(other_mode, urls, video_map, output_dir, url_to_site)
-    new_paths = get_paths_for_mode(target_mode, urls, video_map, output_dir)
+    new_paths = get_paths_for_mode(
        target_mode, urls, video_map, output_dir, url_to_site
    )
    moves = []
    for url in urls:
@@ -163,21 +203,30 @@ def reorganize(urls, video_map, output_dir, target_mode, dry_run=False):
 # ── Download ─────────────────────────────────────────────────────────
-def download_one(session, url, dest, expected_size):
+
 def download_one(
    session: requests.Session,
    url: str,
    dest: str | Path,
    expected_size: int | None,
    referer: str = "",
 ) -> tuple[str, int]:
    dest = Path(dest)
    part = dest.parent / (dest.name + ".part")
    dest.parent.mkdir(parents=True, exist_ok=True)
    if dest.exists():
        local = dest.stat().st_size
-        if expected_size and local == expected_size:
+        if expected_size is not None and local == expected_size:
            return "ok", 0
-        if expected_size and local != expected_size:
+        if expected_size is not None and local != expected_size:
            dest.unlink()
    existing = part.stat().st_size if part.exists() else 0
-    headers = {}
+    headers: dict[str, str] = {}
-    if existing and expected_size and existing < expected_size:
+    if referer:
        headers["Referer"] = referer
    if existing and expected_size is not None and existing < expected_size:
        headers["Range"] = f"bytes={existing}-"
    try:
@@ -205,33 +254,21 @@ def download_one(session, url, dest, expected_size):
        return f"error: {e}", written
    final_size = existing + written
-    if expected_size and final_size != expected_size:
+    if expected_size is not None and final_size != expected_size:
        return "size_mismatch", written
    part.rename(dest)
    return "ok", written
-# ── Data loading ─────────────────────────────────────────────────────
+def collect_urls(video_map: dict[str, Any]) -> list[str]:
 def load_video_map():
    with open(VIDEO_MAP_FILE, encoding="utf-8") as f:
        return json.load(f)
 def _is_valid_url(url):
    return url.startswith(
        "http") and "<" not in url and ">" not in url and " href=" not in url
 def collect_urls(video_map):
    urls, seen, skipped = [], set(), 0
    for entry in video_map.values():
        for video_url in entry.get("videos", []):
            if video_url in seen:
                continue
            seen.add(video_url)
-            if _is_valid_url(video_url):
+            if is_valid_url(video_url):
                urls.append(video_url)
            else:
                skipped += 1
@@ -240,7 +277,7 @@ def collect_urls(video_map):
    return urls
-def build_url_title_map(video_map):
+def build_url_title_map(video_map: dict[str, Any]) -> dict[str, str]:
    url_title = {}
    for entry in video_map.values():
        title = entry.get("title", "")
@@ -252,28 +289,68 @@ def build_url_title_map(video_map):
 # ── Main ─────────────────────────────────────────────────────────────
-def main():
+
-    parser = argparse.ArgumentParser(
+def main() -> None:
-        description="Download videos from video_map.json")
+    parser = argparse.ArgumentParser(description="Download videos from video_map.json")
-    parser.add_argument("--output", "-o", default=DEFAULT_OUTPUT,
+    parser.add_argument(
-                        help=f"Download directory (default: {DEFAULT_OUTPUT})")
+        "--output",
        "-o",
        default=DEFAULT_OUTPUT,
        help=f"Download directory (default: {DEFAULT_OUTPUT})",
    )
    naming = parser.add_mutually_exclusive_group()
-    naming.add_argument("--titles", "-t", action="store_true",
+    naming.add_argument(
-                        help="Use title-based filenames (saved as default for this directory)")
+        "--titles",
-    naming.add_argument("--original", action="store_true",
+        "-t",
-                        help="Use original CloudFront filenames (saved as default for this directory)")
+        action="store_true",
        help="Use title-based filenames (saved as default for this directory)",
    )
    naming.add_argument(
        "--original",
        action="store_true",
        help="Use original CloudFront filenames (saved as default for this directory)",
    )
-    parser.add_argument("--reorganize", action="store_true",
+    parser.add_argument(
-                        help="Rename existing files to match the current naming mode")
+        "--reorganize",
-    parser.add_argument("--dry-run", "-n", action="store_true",
+        action="store_true",
-                        help="Preview without making changes")
+        help="Rename existing files to match the current naming mode",
-    parser.add_argument("--workers", "-w", type=int, default=DEFAULT_WORKERS,
+    )
-                        help=f"Concurrent downloads (default: {DEFAULT_WORKERS})")
+    parser.add_argument(
        "--dry-run", "-n", action="store_true", help="Preview without making changes"
    )
    parser.add_argument(
        "--workers",
        "-w",
        type=int,
        default=DEFAULT_WORKERS,
        help=f"Concurrent downloads (default: {DEFAULT_WORKERS})",
    )
    parser.add_argument(
        "--site",
        action="append",
        choices=list(SITES.keys()),
        dest="sites",
        metavar="SITE",
        help=f"Site(s) to download (default: all). Can be repeated. Choices: {', '.join(SITES)}",
    )
    args = parser.parse_args()
    video_map = load_video_map()
    url_referers = build_url_referers(video_map)
    urls = collect_urls(video_map)
    url_to_site: dict[str, str] = {}
    for site_key in SITES:
        for entry in load_video_map(site_key).values():
            for vid_url in entry.get("videos", []):
                url_to_site[vid_url] = site_key
    if args.sites:
        selected = set(args.sites)
        urls = [u for u in urls if url_to_site.get(u) in selected]
    mode = resolve_mode(args)
    saved = read_mode(args.output)
@@ -287,10 +364,18 @@ def main():
        if mode_changed and not args.reorganize:
            print(f"\n[!] Mode changed from '{saved}' to '{mode}'.")
            print(
-                "    Use --reorganize to rename existing files, or --dry-run to preview.")
+                "    Use --reorganize to rename existing files, or --dry-run to preview."
            )
            print("    Refusing to download until existing files are reorganized.")
            return
-        reorganize(urls, video_map, args.output, mode, dry_run=args.dry_run)
+        reorganize(
            urls,
            video_map,
            args.output,
            mode,
            dry_run=args.dry_run,
            url_to_site=url_to_site,
        )
        if args.dry_run or args.reorganize:
            return
@@ -298,12 +383,13 @@ def main():
    if not args.dry_run:
        write_mode(args.output, mode)
-    paths = get_paths_for_mode(mode, urls, video_map, args.output)
+    paths = get_paths_for_mode(mode, urls, video_map, args.output, url_to_site)
    clashes = find_clashes(urls)
    if clashes:
        print(
-            f"[+] {len(clashes)} filename clash(es) resolved with subfolders/suffixes")
+            f"[+] {len(clashes)} filename clash(es) resolved with subfolders/suffixes"
        )
    already = [u for u in urls if paths[u].exists()]
    pending = [u for u in urls if not paths[u].exists()]
@@ -316,8 +402,7 @@ def main():
        return
    if args.dry_run:
-        print(
+        print(f"\n[dry-run] Would download {len(pending)} files to {args.output}/")
            f"\n[dry-run] Would download {len(pending)} files to {args.output}/")
        for url in pending[:20]:
            print(f"  → {paths[url].name}")
        if len(pending) > 20:
@@ -326,16 +411,15 @@ def main():
    print("\n[+] Fetching remote file sizes…")
    session = make_session()
-    remote_sizes = fetch_sizes(pending, workers=20)
+    remote_sizes = fetch_sizes(pending, workers=20, url_referers=url_referers)
    sized = {u: s for u, s in remote_sizes.items() if s is not None}
    total_bytes = sum(sized.values())
-    print(
+    print(f"[+] Download size: {fmt_size(total_bytes)} across {len(pending)} files")
        f"[+] Download size: {fmt_size(total_bytes)} across {len(pending)} files")
    if already:
        print(f"[+] Verifying {len(already)} existing files…")
-        already_sizes = fetch_sizes(already, workers=20)
+        already_sizes = fetch_sizes(already, workers=20, url_referers=url_referers)
    mismatched = 0
    for url in already:
@@ -344,14 +428,15 @@ def main():
        remote = already_sizes.get(url)
        if remote and local != remote:
            mismatched += 1
-            print(f"[!] Size mismatch: {dest.name} "
+            print(
-                  f"(local {fmt_size(local)} vs remote {fmt_size(remote)})")
+                f"[!] Size mismatch: {dest.name} "
                f"(local {fmt_size(local)} vs remote {fmt_size(remote)})"
            )
            pending.append(url)
            remote_sizes[url] = remote
    if mismatched:
-        print(
+        print(f"[!] {mismatched} file(s) will be re-downloaded due to size mismatch")
            f"[!] {mismatched} file(s) will be re-downloaded due to size mismatch")
    print(f"\n[⚡] Downloading with {args.workers} threads…\n")
@@ -361,10 +446,12 @@ def main():
    total = len(pending)
    interrupted = False
-    def do_download(url):
+    def do_download(url: str) -> tuple[str, tuple[str, int]]:
        dest = paths[url]
        expected = remote_sizes.get(url)
-        return url, download_one(session, url, dest, expected)
+        return url, download_one(
            session, url, dest, expected, url_referers.get(url, "")
        )
    try:
        with ThreadPoolExecutor(max_workers=args.workers) as pool:
@@ -376,11 +463,9 @@ def main():
                name = paths[url].name
                if status == "ok" and written > 0:
-                    print(
+                    print(f"  [{completed}/{total}] ✓ {name} ({fmt_size(written)})")
                        f"  [{completed}/{total}] ✓ {name} ({fmt_size(written)})")
                elif status == "ok":
-                    print(
+                    print(f"  [{completed}/{total}] ✓ {name} (already complete)")
                        f"  [{completed}/{total}] ✓ {name} (already complete)")
                elif status == "size_mismatch":
                    print(f"  [{completed}/{total}] ⚠ {name} (size mismatch)")
                    failed.append(url)
@@ -1,113 +1,130 @@
 #!/usr/bin/env python3
 """
-grab_cookie.py — read the WordPress login cookie from an
+grab_cookie.py — log in to a site and write the session cookie to .env.
-installed browser and write it to .env as WP_LOGIN_COOKIE=name=value.
+
 Requires {SITE}_USERNAME and {SITE}_PASSWORD to be set in the environment or .env.
 Usage:
-    python grab_cookie.py                        # tries Firefox, Chrome, Edge, Brave
+    python grab_cookie.py --site jailbirdz
-    python grab_cookie.py --browser firefox      # explicit browser
+    python grab_cookie.py --site pinkcuffs
 """
 import argparse
 import os
 from pathlib import Path
-from config import COOKIE_DOMAIN
+from typing import Literal
 import requests
 from config import SITES
 ENV_FILE = Path(".env")
 ENV_KEY = "WP_LOGIN_COOKIE"
 COOKIE_PREFIX = "wordpress_logged_in_"
 BROWSER_NAMES = ["firefox", "chrome", "edge", "brave"]
 def update_env(
    name: str,
    value: str,
    env_key: str = "WP_LOGIN_COOKIE",
    path: Path = ENV_FILE,
 ) -> Literal["updated", "appended", "created"]:
    """Write env_key=name=value into the env file, replacing any existing line."""
    new_line = f"{env_key}={name}={value}\n"
-def find_cookie(browser_name):
+    if path.exists():
-    """Return (name, value) for the wordpress_logged_in_* cookie, or (None, None)."""
+        text = path.read_text(encoding="utf-8")
    try:
        import rookiepy
    except ImportError:
        raise ImportError("rookiepy not installed — run: pip install rookiepy")
    fn = getattr(rookiepy, browser_name, None)
    if fn is None:
        raise ValueError(f"rookiepy does not support '{browser_name}'.")
    try:
        cookies = fn([COOKIE_DOMAIN])
    except PermissionError:
        raise PermissionError(
            f"Permission denied reading {browser_name} cookies.\n"
            "    Close the browser, or on Windows run as Administrator for Chrome/Edge."
        )
    except Exception as e:
        raise RuntimeError(f"Could not read {browser_name} cookies: {e}")
    for c in cookies:
        if c.get("name", "").startswith(COOKIE_PREFIX):
            return c["name"], c["value"]
    return None, None
 def update_env(name, value):
    """Write WP_LOGIN_COOKIE=name=value into .env, replacing any existing line."""
    new_line = f"{ENV_KEY}={name}={value}\n"
    if ENV_FILE.exists():
        text = ENV_FILE.read_text(encoding="utf-8")
        lines = text.splitlines(keepends=True)
        for i, line in enumerate(lines):
-            if line.startswith(f"{ENV_KEY}=") or line.strip() == ENV_KEY:
+            key, sep, _ = line.partition("=")
            if key.strip() == env_key and sep:
                lines[i] = new_line
-                ENV_FILE.write_text("".join(lines), encoding="utf-8")
+                path.write_text("".join(lines), encoding="utf-8")
                return "updated"
        # Key not present — append
        if text and not text.endswith("\n"):
            text += "\n"
-        ENV_FILE.write_text(text + new_line, encoding="utf-8")
+        path.write_text(text + new_line, encoding="utf-8")
        return "appended"
    else:
-        ENV_FILE.write_text(new_line, encoding="utf-8")
+        path.write_text(new_line, encoding="utf-8")
        return "created"
-def main():
+def login_and_get_cookie(
    username: str, password: str, base_url: str
 ) -> tuple[str, str]:
    """POST to wp-admin/admin-ajax.php (xootix action) and return (cookie_name, cookie_value).
    No browser needed — the xootix login endpoint takes plain form fields and returns
    the wordpress_logged_in_* cookie directly in the response Set-Cookie headers.
    """
    session = requests.Session()
    r = session.post(
        f"{base_url}/wp-admin/admin-ajax.php",
        data={
            "xoo-el-username": username,
            "xoo-el-password": password,
            "xoo-el-rememberme": "forever",
            "_xoo_el_form": "login",
            "xoo_el_redirect": "/",
            "action": "xoo_el_form_action",
            "display": "popup",
        },
        headers={
            "Referer": f"{base_url}/",
            "Origin": base_url,
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:147.0) Gecko/20100101 Firefox/147.0",
        },
        timeout=30,
    )
    r.raise_for_status()
    result = r.json()
    if result.get("error"):
        raise RuntimeError(f"Login rejected by server: {result.get('notice', result)}")
    for name, value in session.cookies.items():
        if name.startswith(COOKIE_PREFIX):
            return name, value
    raise RuntimeError(
        "Server accepted login but no wordpress_logged_in_* cookie was set.\n"
        "    Check that username and password are correct."
    )
 def _auto_login() -> None:
    parser = argparse.ArgumentParser(
-        description=f"Copy the {COOKIE_DOMAIN} login cookie from your browser into .env."
+        description="Log in and save session cookie to .env"
    )
    parser.add_argument(
-        "--browser", "-b",
+        "--site",
-        choices=BROWSER_NAMES,
+        required=True,
-        metavar="BROWSER",
+        choices=list(SITES.keys()),
-        help=f"Browser to read from: {', '.join(BROWSER_NAMES)} (default: try all in order)",
+        help="Which site to authenticate with",
    )
    args = parser.parse_args()
-    order = [args.browser] if args.browser else BROWSER_NAMES
+    site_cfg = SITES[args.site]
    env_prefix = site_cfg["env_prefix"]
    base_url = site_cfg["base_url"]
    env_key = f"{env_prefix}_LOGIN_COOKIE"
-    cookie_name = cookie_value = None
+    username = os.environ.get(f"{env_prefix}_USERNAME", "").strip()
-    for browser in order:
+    password = os.environ.get(f"{env_prefix}_PASSWORD", "").strip()
-        print(f"[…] Trying {browser}…")
+    if not username or not password:
        try:
            cookie_name, cookie_value = find_cookie(browser)
        except ImportError as e:
            raise SystemExit(f"[!] {e}")
        except (ValueError, PermissionError, RuntimeError) as e:
            print(f"[!] {e}")
            continue
        if cookie_name:
            print(f"[+] Found in {browser}: {cookie_name}")
            break
        print(f"    No {COOKIE_PREFIX}* cookie found in {browser}.")
    if not cookie_name:
        raise SystemExit(
-            f"\n[!] No {COOKIE_PREFIX}* cookie found in any browser.\n"
+            f"[!] {env_prefix}_USERNAME and {env_prefix}_PASSWORD must be set "
-            f"    Make sure you are logged into {COOKIE_DOMAIN}, then re-run.\n"
+            "in the environment or .env — see .env.example."
            "    Or set WP_LOGIN_COOKIE manually in .env — see .env.example."
        )
    try:
        cookie_name, cookie_value = login_and_get_cookie(username, password, base_url)
    except RuntimeError as e:
        raise SystemExit(f"[!] {e}")
    print(f"[+] Login succeeded: {cookie_name}")
    action = update_env(cookie_name, cookie_value, env_key=env_key)
    print(f"[✓] {env_key} {action} in {ENV_FILE}.")
-    action = update_env(cookie_name, cookie_value)
+
-    print(f"[✓] {ENV_KEY} {action} in {ENV_FILE}.")
+def main() -> None:
    _auto_login()
 if __name__ == "__main__":
@@ -1,60 +1,131 @@
 import argparse
 import re
 import json
 import os
 import time
 import signal
 import asyncio
 import tempfile
 import requests
-from pathlib import Path, PurePosixPath
+from pathlib import PurePosixPath
 from typing import Any, Optional
 from urllib.parse import urlparse
 from dotenv import load_dotenv
-from playwright.async_api import async_playwright
+from playwright.async_api import async_playwright, BrowserContext
-from check_clashes import VIDEO_EXTS
+from check_clashes import (
-from config import BASE_URL
+    VIDEO_EXTS,
    load_video_map,
    save_video_map,
    is_valid_url,
    expects_video,
 )
 from config import SITES
 from grab_cookie import login_and_get_cookie, update_env
 load_dotenv()
-def _is_video_url(url):
+def _is_video_url(url: str) -> bool:
    """True if `url` ends with a recognised video extension (case-insensitive, path only)."""
    return PurePosixPath(urlparse(url).path).suffix.lower() in VIDEO_EXTS
-WP_API = f"{BASE_URL}/wp-json/wp/v2"
+
 SKIP_TYPES = {
-    "attachment", "nav_menu_item", "wp_block", "wp_template",
+    "attachment",
-    "wp_template_part", "wp_global_styles", "wp_navigation",
+    "nav_menu_item",
-    "wp_font_family", "wp_font_face",
+    "wp_block",
    "wp_template",
    "wp_template_part",
    "wp_global_styles",
    "wp_navigation",
    "wp_font_family",
    "wp_font_face",
 }
 VIDEO_MAP_FILE = "video_map.json"
 MAX_WORKERS = 4
-API_HEADERS = {
+_USER_AGENT = (
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:147.0) Gecko/20100101 Firefox/147.0",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:147.0) Gecko/20100101 Firefox/147.0"
 )
 def _api_headers(base_url: str, cookie_name: str, cookie_value: str) -> dict[str, str]:
    return {
        "User-Agent": _USER_AGENT,
        "Accept": "application/json",
-    "Referer": f"{BASE_URL}/",
+        "Referer": f"{base_url}/",
        "Cookie": f"{cookie_name}={cookie_value}; eav-age-verified=1",
    }
-def _get_login_cookie():
+def _select_probe_url(video_map: dict[str, Any]) -> str | None:
-    raw = os.environ.get("WP_LOGIN_COOKIE", "").strip()  # strip accidental whitespace
+    """Pure function: return the first URL in video_map where expects_video() is True."""
-    if not raw:
+    return next((url for url in video_map if expects_video(url)), None)
-        raise RuntimeError(
+
-            "WP_LOGIN_COOKIE not set. Copy it from your browser into .env — see .env.example.")
+
 def _probe_cookie(name: str, value: str, site_key: str) -> bool:
    """HEAD request to a members-only video page. Returns True if the cookie is still valid."""
    video_map = load_video_map(site_key)
    probe_url = _select_probe_url(video_map)
    if probe_url is None:
        return False  # no video URLs yet — can't validate, fall through to re-auth
    r = requests.head(
        probe_url,
        headers={"Cookie": f"{name}={value}", "User-Agent": _USER_AGENT},
        allow_redirects=False,
        timeout=10,
    )
    return r.status_code == 200
 def _get_login_cookie(site_key: str, site_cfg: dict[str, str]) -> tuple[str, str]:
    env_prefix = site_cfg["env_prefix"]
    base_url = site_cfg["base_url"]
    env_key = f"{env_prefix}_LOGIN_COOKIE"
    username = os.environ.get(f"{env_prefix}_USERNAME", "").strip()
    password = os.environ.get(f"{env_prefix}_PASSWORD", "").strip()
    has_credentials = bool(username and password)
    raw = os.environ.get(env_key, "").strip()
    if raw:
        name, _, value = raw.partition("=")
-    if not value:
+        if value and name.startswith("wordpress_logged_in_"):
-        raise RuntimeError(
+            if not has_credentials:
-            "WP_LOGIN_COOKIE looks malformed (no '=' found). Expected: name=value")
+                return name, value  # cookie-only mode — trust it
-    if not name.startswith("wordpress_logged_in_"):
+            print(f"[{site_key}] Cookie found — validating…")
-        raise RuntimeError(
+            if _probe_cookie(name, value, site_key):
-            "WP_LOGIN_COOKIE doesn't look right — expected a wordpress_logged_in_... cookie.")
+                print(f"[{site_key}] Cookie still valid — skipping login.")
                return name, value
            print(f"[{site_key}] Cookie expired — re-authenticating…")
    if has_credentials:
        cookie_name, cookie_value = login_and_get_cookie(username, password, base_url)
        action = update_env(cookie_name, cookie_value, env_key=env_key)
        print(f"[{site_key}] Logged in: {cookie_name} ({action} in .env)")
        return cookie_name, cookie_value
    raise RuntimeError(
        f"No credentials or cookie found for {site_key}. Set either:\n"
        f"  • {env_prefix}_USERNAME + {env_prefix}_PASSWORD  (recommended)\n"
        f"  • {env_prefix}_LOGIN_COOKIE                      (fallback — may expire)\n"
        "See .env.example."
    )
-def discover_content_types(session):
+def _has_credentials(site_cfg: dict[str, str]) -> bool:
-    """Query /wp-json/wp/v2/types and return a list of (name, rest_base, type_slug) for content types worth scraping."""
+    env_prefix = site_cfg["env_prefix"]
-    r = session.get(f"{WP_API}/types", timeout=30)
+    has_cookie = bool(os.environ.get(f"{env_prefix}_LOGIN_COOKIE", "").strip())
    has_creds = bool(
        os.environ.get(f"{env_prefix}_USERNAME", "").strip()
        and os.environ.get(f"{env_prefix}_PASSWORD", "").strip()
    )
    return has_cookie or has_creds
 def discover_content_types(
    session: requests.Session, wp_api: str
 ) -> list[tuple[str, str, str]]:
    """Query /wp-json/wp/v2/types and return a list of (name, rest_base, type_slug)."""
    r = session.get(f"{wp_api}/types", timeout=30)
    r.raise_for_status()
    types = r.json()
@@ -69,16 +140,22 @@ def discover_content_types(session):
    return targets
-def fetch_all_posts_for_type(session, type_name, rest_base, type_slug):
+def fetch_all_posts_for_type(
-    """Paginate one content type and return (url, title, description) tuples.
+    session: requests.Session,
-    Uses the `link` field when available; falls back to building from slug."""
+    wp_api: str,
    base_url: str,
    type_name: str,
    rest_base: str,
    type_slug: str,
 ) -> list[tuple[str, str, str]]:
    """Paginate one content type and return (url, title, description) tuples."""
    url_prefix = type_slug.replace("_", "-")
    results = []
    page = 1
    while True:
        r = session.get(
-            f"{WP_API}/{rest_base}",
+            f"{wp_api}/{rest_base}",
            params={"per_page": 100, "page": page},
            timeout=30,
        )
@@ -92,15 +169,19 @@ def fetch_all_posts_for_type(session, type_name, rest_base, type_slug):
            if not link.startswith("http"):
                slug = post.get("slug")
                if slug:
-                    link = f"{BASE_URL}/{url_prefix}/{slug}/"
+                    link = f"{base_url}/{url_prefix}/{slug}/"
                else:
                    continue
            title_obj = post.get("title", {})
-            title = title_obj.get("rendered", "") if isinstance(
+            title = (
-                title_obj, dict) else str(title_obj)
+                title_obj.get("rendered", "")
                if isinstance(title_obj, dict)
                else str(title_obj)
            )
            content_obj = post.get("content", {})
-            content_html = content_obj.get(
+            content_html = (
-                "rendered", "") if isinstance(content_obj, dict) else ""
+                content_obj.get("rendered", "") if isinstance(content_obj, dict) else ""
            )
            description = html_to_text(content_html) if content_html else ""
            results.append((link, title, description))
        print(f"    {type_name} page {page}: {len(data)} items")
@@ -109,66 +190,88 @@ def fetch_all_posts_for_type(session, type_name, rest_base, type_slug):
    return results
-def fetch_post_urls_from_api(headers):
+def fetch_post_urls_from_api(
    site_key: str,
    base_url: str,
    wp_api: str,
    headers: dict[str, str],
 ) -> list[str]:
    """Auto-discover all content types via the WP REST API and collect every post URL.
-    Also builds video_map.json with titles pre-populated."""
+    Also pre-populates video_map.json with titles."""
-    print("[+] video_map.json empty or missing — discovering content types from REST API…")
+    print(f"[{site_key}] video_map empty — discovering content types from REST API…")
    session = requests.Session()
    session.headers.update(headers)
-    targets = discover_content_types(session)
+    targets = discover_content_types(session, wp_api)
    print(
-        f"[+] Found {len(targets)} content types: {', '.join(name for name, _, _ in targets)}\n")
+        f"[{site_key}] Found {len(targets)} content types: "
        f"{', '.join(name for name, _, _ in targets)}\n"
    )
    all_results = []
    for type_name, rest_base, type_slug in targets:
        type_results = fetch_all_posts_for_type(
-            session, type_name, rest_base, type_slug)
+            session, wp_api, base_url, type_name, rest_base, type_slug
        )
        all_results.extend(type_results)
-    seen = set()
+    seen: set[str] = set()
    deduped_urls = []
-    video_map = load_video_map()
+    video_map = load_video_map(site_key)
    for url, title, description in all_results:
        if url not in seen and url.startswith("http"):
            seen.add(url)
            deduped_urls.append(url)
            if url not in video_map:
-                video_map[url] = {"title": title,
+                video_map[url] = {
-                                  "description": description, "videos": []}
+                    "title": title,
                    "description": description,
                    "videos": [],
                }
            else:
                if not video_map[url].get("title"):
                    video_map[url]["title"] = title
                if not video_map[url].get("description"):
                    video_map[url]["description"] = description
-    save_video_map(video_map)
+    save_video_map(video_map, site_key)
    print(
-        f"\n[+] Discovered {len(deduped_urls)} unique URLs → saved to {VIDEO_MAP_FILE}")
+        f"\n[{site_key}] Discovered {len(deduped_urls)} unique URLs → saved to video_map.json"
-    print(
+    )
-        f"[+] Pre-populated {len(video_map)} entries in {VIDEO_MAP_FILE}")
+    print(f"[{site_key}] Pre-populated {len(video_map)} entries")
    return deduped_urls
-def fetch_metadata_from_api(video_map, urls, headers):
+def fetch_metadata_from_api(
    site_key: str,
    base_url: str,
    wp_api: str,
    video_map: dict[str, Any],
    urls: list[str],
    headers: dict[str, str],
 ) -> None:
    """Populate missing titles and descriptions in video_map from the REST API."""
-    missing = [u for u in urls
+    missing = [
        u
        for u in urls
        if u not in video_map
        or not video_map[u].get("title")
-               or not video_map[u].get("description")]
+        or not video_map[u].get("description")
    ]
    if not missing:
        return
-    print(f"[+] Fetching metadata from REST API for {len(missing)} posts…")
+    print(f"[{site_key}] Fetching metadata from REST API for {len(missing)} posts…")
    session = requests.Session()
    session.headers.update(headers)
-    targets = discover_content_types(session)
+    targets = discover_content_types(session, wp_api)
    for type_name, rest_base, type_slug in targets:
        type_results = fetch_all_posts_for_type(
-            session, type_name, rest_base, type_slug)
+            session, wp_api, base_url, type_name, rest_base, type_slug
        )
        for url, title, description in type_results:
            if url in video_map:
                if not video_map[url].get("title"):
@@ -176,93 +279,90 @@ def fetch_metadata_from_api(video_map, urls, headers):
                if not video_map[url].get("description"):
                    video_map[url]["description"] = description
            else:
-                video_map[url] = {"title": title,
+                video_map[url] = {
-                                  "description": description, "videos": []}
+                    "title": title,
                    "description": description,
                    "videos": [],
                }
-    save_video_map(video_map)
+    save_video_map(video_map, site_key)
    populated_t = sum(1 for u in urls if video_map.get(u, {}).get("title"))
-    populated_d = sum(1 for u in urls if video_map.get(
+    populated_d = sum(1 for u in urls if video_map.get(u, {}).get("description"))
-        u, {}).get("description"))
+    print(f"[{site_key}] Titles populated: {populated_t}/{len(urls)}")
-    print(f"[+] Titles populated: {populated_t}/{len(urls)}")
+    print(f"[{site_key}] Descriptions populated: {populated_d}/{len(urls)}")
    print(f"[+] Descriptions populated: {populated_d}/{len(urls)}")
-def load_post_urls(headers):
+def load_post_urls(
-    vm = load_video_map()
+    site_key: str,
    base_url: str,
    wp_api: str,
    headers: dict[str, str],
 ) -> list[str]:
    vm = load_video_map(site_key)
    if vm:
-        print(f"[+] {VIDEO_MAP_FILE} found — loading {len(vm)} post URLs.")
+        print(f"[{site_key}] video_map found — loading {len(vm)} post URLs.")
        return list(vm.keys())
-    return fetch_post_urls_from_api(headers)
+    return fetch_post_urls_from_api(site_key, base_url, wp_api, headers)
-def html_to_text(html_str):
+def html_to_text(html_str: str) -> str:
    """Strip HTML tags, decode entities, and collapse whitespace into clean plain text."""
    import html
-    text = re.sub(r'<br\s*/?>', '\n', html_str)
+
-    text = text.replace('</p>', '\n\n')
+    text = re.sub(r"<br\s*/?>", "\n", html_str)
-    text = re.sub(r'<[^>]+>', '', text)
+    text = text.replace("</p>", "\n\n")
    text = re.sub(r"<[^>]+>", "", text)
    text = html.unescape(text)
    lines = [line.strip() for line in text.splitlines()]
-    text = '\n'.join(lines)
+    text = "\n".join(lines)
-    text = re.sub(r'\n{3,}', '\n\n', text)
+    text = re.sub(r"\n{3,}", "\n\n", text)
    return text.strip()
-def extract_mp4_from_html(html):
+def extract_mp4_from_html(html: str) -> list[str]:
    candidates = re.findall(r'https?://[^\s"\'<>]+', html)
    return [u for u in candidates if _is_video_url(u)]
-def extract_title_from_html(html):
+def extract_title_from_html(html: str) -> str | None:
-    m = re.search(
+    m = re.search(r'<h1[^>]*class="entry-title"[^>]*>(.*?)</h1>', html, re.DOTALL)
        r'<h1[^>]*class="entry-title"[^>]*>(.*?)</h1>', html, re.DOTALL)
    if m:
-        title = re.sub(r'<[^>]+>', '', m.group(1)).strip()
+        title = re.sub(r"<[^>]+>", "", m.group(1)).strip()
        return title
-    m = re.search(r'<title>(.*?)(?:\s*[-–|].*)?</title>', html, re.DOTALL)
+    m = re.search(r"<title>(.*?)(?:\s*[-–|].*)?</title>", html, re.DOTALL)
    if m:
        return m.group(1).strip()
    return None
 def load_video_map():
    if Path(VIDEO_MAP_FILE).exists():
        try:
            with open(VIDEO_MAP_FILE, encoding="utf-8") as f:
                return json.load(f)
        except (json.JSONDecodeError, OSError):
            return {}
    return {}
 def save_video_map(video_map):
    fd, tmp_path = tempfile.mkstemp(dir=Path(VIDEO_MAP_FILE).resolve().parent, suffix=".tmp")
    try:
        with os.fdopen(fd, "w", encoding="utf-8") as f:
            json.dump(video_map, f, indent=2, ensure_ascii=False)
        Path(tmp_path).replace(VIDEO_MAP_FILE)
    except Exception:
        try:
            Path(tmp_path).unlink()
        except OSError:
            pass
        raise
 def _expects_video(url):
    return "/pinkcuffs-videos/" in url
 MAX_RETRIES = 2
-async def worker(worker_id, queue, context, known,
+async def worker(
-                 total, retry_counts, video_map, map_lock, shutdown_event):
+    worker_id: int,
    queue: asyncio.Queue[tuple[int, str]],
    context: BrowserContext,
    known: set[str],
    total: int,
    retry_counts: dict[int, int],
    video_map: dict[str, Any],
    map_lock: asyncio.Lock,
    shutdown_event: asyncio.Event,
    reauth_lock: asyncio.Lock,
    reauth_done: list[bool],
    site_key: str,
    site_cfg: dict[str, str],
 ) -> None:
    base_url = site_cfg["base_url"]
    cookie_domain = urlparse(base_url).hostname or site_cfg["cookie_domain"]
    env_prefix = site_cfg["env_prefix"]
    page = await context.new_page()
-    video_hits = set()
+    video_hits: set[str] = set()
-    page.on("response", lambda resp: video_hits.add(resp.url) if _is_video_url(resp.url) else None)
+    page.on(
        "response",
        lambda resp: video_hits.add(resp.url) if _is_video_url(resp.url) else None,
    )
    try:
        while not shutdown_event.is_set():
@@ -279,19 +379,69 @@ async def worker(worker_id, queue, context, known,
                await page.goto(url, wait_until="networkidle", timeout=60000)
            except Exception as e:
                print(f"[W{worker_id}] Navigation error: {e}")
-                if _expects_video(url) and attempt < MAX_RETRIES:
+                if expects_video(url) and attempt < MAX_RETRIES:
                    retry_counts[idx] = attempt + 1
                    queue.put_nowait((idx, url))
                    print(f"[W{worker_id}] Re-queued for retry.")
-                elif not _expects_video(url):
+                elif not expects_video(url):
                    async with map_lock:
                        entry = video_map.get(url, {})
                        entry["scraped_at"] = int(time.time())
                        video_map[url] = entry
-                        save_video_map(video_map)
+                        save_video_map(video_map, site_key)
                else:
                    print(
-                        f"[W{worker_id}] Still failing after {MAX_RETRIES} retries — will retry next run.")
+                        f"[W{worker_id}] Still failing after {MAX_RETRIES} retries — will retry next run."
                    )
                continue
            if "NoDirectAccessAllowed" in page.url:
                recovered = False
                async with reauth_lock:
                    if not reauth_done[0]:
                        username = os.environ.get(f"{env_prefix}_USERNAME", "").strip()
                        password = os.environ.get(f"{env_prefix}_PASSWORD", "").strip()
                        if username and password:
                            print(f"[W{worker_id}] Cookie expired — re-authenticating…")
                            try:
                                new_name, new_value = await asyncio.to_thread(
                                    login_and_get_cookie, username, password, base_url
                                )
                                update_env(
                                    new_name,
                                    new_value,
                                    env_key=f"{env_prefix}_LOGIN_COOKIE",
                                )
                                await context.add_cookies(
                                    [
                                        {
                                            "name": new_name,
                                            "value": new_value,
                                            "domain": cookie_domain,
                                            "path": "/",
                                            "httpOnly": True,
                                            "secure": True,
                                            "sameSite": "None",
                                        }
                                    ]
                                )
                                reauth_done[0] = True
                                recovered = True
                                print(f"[W{worker_id}] Re-auth succeeded — re-queuing.")
                            except Exception as e:
                                print(f"[W{worker_id}] Re-auth failed: {e}")
                                shutdown_event.set()
                        else:
                            print(
                                f"[W{worker_id}] Cookie expired. "
                                f"Set {env_prefix}_USERNAME + {env_prefix}_PASSWORD "
                                "in .env for auto re-auth."
                            )
                            shutdown_event.set()
                    else:
                        recovered = True  # another worker already re-authed
                if recovered:
                    queue.put_nowait((idx, url))
                continue
            await asyncio.sleep(1.5)
@@ -301,9 +451,15 @@ async def worker(worker_id, queue, context, known,
            found = set(html_videos) | set(video_hits)
            video_hits.clear()
-            all_videos = [m for m in found if m not in (
+            all_videos = [
-                f"{BASE_URL}/wp-content/plugins/easy-video-player/lib/blank.mp4",
+                m
-            )]
+                for m in found
                if is_valid_url(m)
                and m
                not in (
                    f"{base_url}/wp-content/plugins/easy-video-player/lib/blank.mp4",
                )
            ]
            async with map_lock:
                new_found = found - known
@@ -312,7 +468,8 @@ async def worker(worker_id, queue, context, known,
                    known.update(new_found)
                elif all_videos:
                    print(
-                        f"[W{worker_id}] {len(all_videos)} video(s) already known — skipping write.")
+                        f"[W{worker_id}] {len(all_videos)} video(s) already known — skipping write."
                    )
                else:
                    print(f"[W{worker_id}] No video found on page.")
@@ -322,51 +479,52 @@ async def worker(worker_id, queue, context, known,
                existing_videos = set(entry.get("videos", []))
                existing_videos.update(all_videos)
                entry["videos"] = sorted(existing_videos)
-                mark_done = bool(all_videos) or not _expects_video(url)
+                mark_done = bool(all_videos) or not expects_video(url)
                if mark_done:
                    entry["scraped_at"] = int(time.time())
                video_map[url] = entry
-                save_video_map(video_map)
+                save_video_map(video_map, site_key)
            if not mark_done:
                if attempt < MAX_RETRIES:
                    retry_counts[idx] = attempt + 1
                    queue.put_nowait((idx, url))
                    print(
-                        f"[W{worker_id}] Re-queued for retry ({attempt + 1}/{MAX_RETRIES}).")
+                        f"[W{worker_id}] Re-queued for retry ({attempt + 1}/{MAX_RETRIES})."
                    )
                else:
                    print(
-                        f"[W{worker_id}] No video after {MAX_RETRIES} retries — will retry next run.")
+                        f"[W{worker_id}] No video after {MAX_RETRIES} retries — will retry next run."
                    )
    finally:
        await page.close()
-async def run():
+async def run_for_site(
-    shutdown_event = asyncio.Event()
+    site_key: str,
-    loop = asyncio.get_running_loop()
+    site_cfg: dict[str, str],
    shutdown_event: asyncio.Event,
 ) -> None:
    base_url = site_cfg["base_url"]
    cookie_domain = urlparse(base_url).hostname or site_cfg["cookie_domain"]
    wp_api = f"{base_url}/wp-json/wp/v2"
-    def _handle_shutdown(signum, _frame):
+    cookie_name, cookie_value = _get_login_cookie(site_key, site_cfg)
-        print(f"\n[!] Signal {signum} received — finishing active pages then exiting…")
+    req_headers = _api_headers(base_url, cookie_name, cookie_value)
        loop.call_soon_threadsafe(shutdown_event.set)
-    signal.signal(signal.SIGINT, _handle_shutdown)
+    urls = load_post_urls(site_key, base_url, wp_api, req_headers)
    signal.signal(signal.SIGTERM, _handle_shutdown)
-    try:
+    video_map = load_video_map(site_key)
-        cookie_name, cookie_value = _get_login_cookie()
+    if any(
-        req_headers = {
+        u not in video_map
            **API_HEADERS,
            "Cookie": f"{cookie_name}={cookie_value}; eav-age-verified=1",
        }
        urls = load_post_urls(req_headers)
        video_map = load_video_map()
        if any(u not in video_map
        or not video_map[u].get("title")
        or not video_map[u].get("description")
-               for u in urls if _expects_video(u)):
+        for u in urls
-            fetch_metadata_from_api(video_map, urls, req_headers)
+        if expects_video(u)
    ):
        fetch_metadata_from_api(
            site_key, base_url, wp_api, video_map, urls, req_headers
        )
    known = {u for entry in video_map.values() for u in entry.get("videos", [])}
@@ -377,62 +535,79 @@ async def run():
        entry = video_map.get(u, {})
        if not entry.get("scraped_at"):
            pending.append((i, u))
-            elif _expects_video(u) and not entry.get("videos"):
+        elif expects_video(u) and not entry.get("videos"):
            pending.append((i, u))
            needs_map += 1
    done_count = sum(1 for v in video_map.values() if v.get("scraped_at"))
-        print(f"[+] Loaded {total} post URLs.")
+    print(f"[{site_key}] Loaded {total} post URLs.")
-        print(f"[+] Already have {len(known)} video URLs mapped.")
+    print(f"[{site_key}] Already have {len(known)} video URLs mapped.")
-        print(f"[+] Video map: {len(video_map)} entries in {VIDEO_MAP_FILE}")
+    print(f"[{site_key}] Video map: {len(video_map)} entries in video_map.json")
    if done_count:
        remaining_new = len(pending) - needs_map
        print(
-                f"[↻] Resuming: {done_count} done, {remaining_new} new + {needs_map} needing map data.")
+            f"[{site_key}] Resuming: {done_count} done, "
            f"{remaining_new} new + {needs_map} needing map data."
        )
    if not pending:
-            print("[✓] All URLs already processed and mapped.")
+        print(f"[{site_key}] All URLs already processed and mapped.")
        return
    print(
-            f"[⚡] Running with {min(MAX_WORKERS, len(pending))} concurrent workers.\n")
+        f"[{site_key}] Running with {min(MAX_WORKERS, len(pending))} concurrent workers.\n"
    )
-        queue = asyncio.Queue()
+    queue: asyncio.Queue[tuple[int, str]] = asyncio.Queue()
    for item in pending:
        queue.put_nowait(item)
    map_lock = asyncio.Lock()
-        retry_counts = {}
+    reauth_lock = asyncio.Lock()
    reauth_done: list[bool] = [False]
    retry_counts: dict[int, int] = {}
    async with async_playwright() as p:
        browser = await p.firefox.launch(headless=True)
        context = await browser.new_context()
            _cookie_domain = urlparse(BASE_URL).netloc
        site_cookies = [
            {
                "name": cookie_name,
                "value": cookie_value,
-                    "domain": _cookie_domain,
+                "domain": cookie_domain,
                "path": "/",
                "httpOnly": True,
                "secure": True,
-                    "sameSite": "None"
+                "sameSite": "None",
            },
            {
                "name": "eav-age-verified",
                "value": "1",
-                    "domain": _cookie_domain,
+                "domain": cookie_domain,
-                    "path": "/"
+                "path": "/",
-                }
+            },
        ]
-            await context.add_cookies(site_cookies)
+        await context.add_cookies(site_cookies)  # type: ignore[arg-type]
        num_workers = min(MAX_WORKERS, len(pending))
        workers = [
            asyncio.create_task(
-                    worker(i, queue, context, known,
+                worker(
-                           total, retry_counts, video_map, map_lock, shutdown_event)
+                    i,
                    queue,
                    context,
                    known,
                    total,
                    retry_counts,
                    video_map,
                    map_lock,
                    shutdown_event,
                    reauth_lock,
                    reauth_done,
                    site_key,
                    site_cfg,
                )
            )
            for i in range(num_workers)
        ]
@@ -442,21 +617,64 @@ async def run():
    mapped = sum(1 for v in video_map.values() if v.get("videos"))
    print(
-            f"\n[+] Video map: {mapped} posts with videos, {len(video_map)} total entries.")
+        f"\n[{site_key}] Video map: {mapped} posts with videos, {len(video_map)} total entries."
    )
    if not shutdown_event.is_set():
-            print(f"[✓] Completed. Full map in {VIDEO_MAP_FILE}")
+        print(f"[{site_key}] Completed. Full map in video_map.json")
    else:
        done = sum(1 for v in video_map.values() if v.get("scraped_at"))
-            print(f"[⏸] Paused — {done}/{total} done. Run again to resume.")
+        print(f"[{site_key}] Paused — {done}/{total} done. Run again to resume.")
 async def run(selected_sites: list[str], explicit: bool) -> None:
    shutdown_event = asyncio.Event()
    loop = asyncio.get_running_loop()
    def _handle_shutdown(signum: int, _: object) -> None:
        print(f"\n[!] Signal {signum} received — finishing active pages then exiting…")
        loop.call_soon_threadsafe(shutdown_event.set)
    signal.signal(signal.SIGINT, _handle_shutdown)
    signal.signal(signal.SIGTERM, _handle_shutdown)
    try:
        for site_key in selected_sites:
            if shutdown_event.is_set():
                break
            site_cfg = SITES[site_key]
            if not _has_credentials(site_cfg):
                if explicit:
                    raise RuntimeError(
                        f"No credentials or cookie found for {site_key}. See .env.example."
                    )
                print(f"[{site_key}] No credentials found — skipping.")
                continue
            print(f"\n{'=' * 60}")
            print(f"  Site: {site_key}  ({site_cfg['base_url']})")
            print(f"{'=' * 60}\n")
            await run_for_site(site_key, site_cfg, shutdown_event)
    finally:
        signal.signal(signal.SIGINT, signal.SIG_DFL)
        signal.signal(signal.SIGTERM, signal.SIG_DFL)
-def main():
+def main() -> None:
    parser = argparse.ArgumentParser(description="Scrape video URLs from member sites")
    parser.add_argument(
        "--site",
        action="append",
        choices=list(SITES.keys()),
        dest="sites",
        metavar="SITE",
        help=f"Site(s) to scrape (default: all). Can be repeated. Choices: {', '.join(SITES)}",
    )
    args = parser.parse_args()
    explicit = bool(args.sites)
    selected = args.sites or list(SITES.keys())
    try:
-        asyncio.run(run())
+        asyncio.run(run(selected, explicit))
    except KeyboardInterrupt:
        print("\n[!] Interrupted. Run again to resume.")
    except RuntimeError as e:
@@ -1,4 +1,3 @@
 playwright==1.58.0
 python-dotenv==1.2.1
 Requests==2.32.5
 rookiepy==0.5.6
@@ -4,16 +4,41 @@ Importable function:
    summarize_sizes(sizes) - return dict with total, smallest, largest, average, failed
 """
-from check_clashes import fmt_size, fetch_sizes, load_video_map, VIDEO_MAP_FILE
+from typing import Optional, TypedDict
 from check_clashes import (
    fmt_size,
    fetch_sizes,
    load_video_map,
    build_url_referers,
    VIDEO_MAP_FILE,
 )
-def summarize_sizes(sizes):
+class SizeStats(TypedDict):
    sized: int
    total: int
    total_bytes: int
    smallest: int
    largest: int
    average: int
    failed: list[str]
 def summarize_sizes(sizes: dict[str, Optional[int]]) -> SizeStats:
    """Given {url: size_or_None}, return a stats dict."""
    known = {u: s for u, s in sizes.items() if s is not None}
    failed = [u for u, s in sizes.items() if s is None]
    if not known:
-        return {"sized": 0, "total": len(sizes), "total_bytes": 0,
+        return {
-                "smallest": 0, "largest": 0, "average": 0, "failed": failed}
+            "sized": 0,
            "total": len(sizes),
            "total_bytes": 0,
            "smallest": 0,
            "largest": 0,
            "average": 0,
            "failed": failed,
        }
    total_bytes = sum(known.values())
    return {
        "sized": len(known),
@@ -28,19 +53,28 @@ def summarize_sizes(sizes):
 # --------------- CLI ---------------
-def _progress(done, total):
+
 def _progress(done: int, total: int) -> None:
    if done % 200 == 0 or done == total:
        print(f"    {done}/{total}")
-def main():
+def main() -> None:
    vm = load_video_map()
-    urls = [u for entry in vm.values() for u in entry.get("videos", []) if u.startswith("http")]
+    urls: list[str] = [
        u
        for entry in vm.values()
        for u in entry.get("videos", [])
        if u.startswith("http")
    ]
    url_referers = build_url_referers(vm)
    print(f"[+] {len(urls)} URLs in {VIDEO_MAP_FILE}")
    print("[+] Fetching file sizes (20 threads)…\n")
-    sizes = fetch_sizes(urls, workers=20, on_progress=_progress)
+    sizes = fetch_sizes(
        urls, workers=20, on_progress=_progress, url_referers=url_referers
    )
    stats = summarize_sizes(sizes)
    print(f"\n{'=' * 45}")
@@ -26,13 +26,14 @@ from pathlib import Path
 import re
 import sys
 import time
 from typing import Any, cast
 import requests
 from dotenv import load_dotenv
-from check_clashes import fmt_size, url_to_filename, VIDEO_EXTS
+from check_clashes import fmt_size, url_to_filename, VIDEO_EXTS, load_video_map
 from config import SITES
 from download import (
    load_video_map,
    collect_urls,
    get_paths_for_mode,
    read_mode,
@@ -52,21 +53,21 @@ PT_NAME_MAX = 120
 # ── Text helpers ─────────────────────────────────────────────────────
-def clean_description(raw):
+
 def clean_description(raw: str) -> str:
    """Strip WordPress shortcodes and HTML from a description."""
    if not raw:
        return ""
-    text = re.sub(r'\[/?[^\]]+\]', '', raw)
+    text = re.sub(r"\[/?[^\]]+\]", "", raw)
-    text = re.sub(r'<[^>]+>', '', text)
+    text = re.sub(r"<[^>]+>", "", text)
    text = html.unescape(text)
-    text = re.sub(r'\n{3,}', '\n\n', text).strip()
+    text = re.sub(r"\n{3,}", "\n\n", text).strip()
    return text[:10000]
-def make_pt_name(title, fallback_filename):
+def make_pt_name(title: str, fallback_filename: str) -> str:
    """Build a PeerTube-safe video name (3-120 chars)."""
-    name = html.unescape(title).strip(
+    name = html.unescape(title).strip() if title else Path(fallback_filename).stem
    ) if title else Path(fallback_filename).stem
    if len(name) > PT_NAME_MAX:
        name = name[: PT_NAME_MAX - 1].rstrip() + "\u2026"
    while len(name) < 3:
@@ -76,7 +77,8 @@ def make_pt_name(title, fallback_filename):
 # ── PeerTube API ─────────────────────────────────────────────────────
-def get_oauth_token(base, username, password):
+
 def get_oauth_token(base: str, username: str, password: str) -> str:
    r = requests.get(f"{base}/api/v1/oauth-clients/local", timeout=15)
    r.raise_for_status()
    client = r.json()
@@ -93,26 +95,36 @@ def get_oauth_token(base, username, password):
        timeout=15,
    )
    r.raise_for_status()
-    return r.json()["access_token"]
+    data_any: Any = r.json()
    data = cast(dict[str, Any], data_any)
    token = data.get("access_token")
    if not isinstance(token, str) or not token:
        raise RuntimeError("PeerTube token response missing access_token")
    return token
-def api_headers(token):
+def api_headers(token: str) -> dict[str, str]:
    return {"Authorization": f"Bearer {token}"}
-def get_channel_id(base, token, channel_name):
+def get_channel_id(base: str, token: str, channel_name: str) -> int:
    r = requests.get(
        f"{base}/api/v1/video-channels/{channel_name}",
        headers=api_headers(token),
        timeout=15,
    )
    r.raise_for_status()
-    return r.json()["id"]
+    data_any: Any = r.json()
    data = cast(dict[str, Any], data_any)
    cid = data.get("id")
    if not isinstance(cid, int):
        raise RuntimeError("PeerTube channel response missing id")
    return cid
-def get_channel_video_names(base, token, channel_name):
+def get_channel_video_names(base: str, token: str, channel_name: str) -> Counter[str]:
    """Paginate through the channel and return a Counter of video names."""
-    counts = Counter()
+    counts: Counter[str] = Counter()
    start = 0
    while True:
        r = requests.get(
@@ -135,8 +147,16 @@ CHUNK_SIZE = 10 * 1024 * 1024  # 10 MB
 MAX_RETRIES = 5
-def _init_resumable(base, token, channel_id, filepath, filename, name,
+def _init_resumable(
-                    description="", nsfw=False):
+    base: str,
    token: str,
    channel_id: int,
    filepath: Path,
    filename: str,
    name: str,
    description: str = "",
    nsfw: bool = False,
 ) -> tuple[str, int]:
    """POST to create a resumable upload session.  Returns upload URL."""
    file_size = Path(filepath).stat().st_size
    metadata = {
@@ -171,7 +191,7 @@ def _init_resumable(base, token, channel_id, filepath, filename, name,
    return location, file_size
-def _query_offset(upload_url, token, file_size):
+def _query_offset(upload_url: str, token: str, file_size: int) -> int:
    """Ask the server how many bytes it has received so far."""
    r = requests.put(
        upload_url,
@@ -193,8 +213,15 @@ def _query_offset(upload_url, token, file_size):
    return 0
-def upload_video(base, token, channel_id, filepath, name,
+def upload_video(
-                 description="", nsfw=False):
+    base: str,
    token: str,
    channel_id: int,
    filepath: Path,
    name: str,
    description: str = "",
    nsfw: bool = False,
 ) -> tuple[bool, str | None]:
    """Resumable chunked upload.  Returns (ok, uuid)."""
    filepath = Path(filepath)
    filename = filepath.name
@@ -202,8 +229,14 @@ def upload_video(base, token, channel_id, filepath, name,
    try:
        upload_url, _ = _init_resumable(
-            base, token, channel_id, filepath, filename,
+            base,
-            name, description, nsfw,
+            token,
            channel_id,
            filepath,
            filename,
            name,
            description,
            nsfw,
        )
    except Exception as e:
        print(f"    Init failed: {e}")
@@ -221,8 +254,11 @@ def upload_video(base, token, channel_id, filepath, name,
            chunk = f.read(chunk_len)
            pct = int(100 * (end + 1) / file_size)
-            print(f"    {fmt_size(offset)}/{fmt_size(file_size)}  ({pct}%)",
+            print(
-                  end="\r", flush=True)
+                f"    {fmt_size(offset)}/{fmt_size(file_size)}  ({pct}%)",
                end="\r",
                flush=True,
            )
            try:
                r = requests.put(
@@ -239,12 +275,13 @@ def upload_video(base, token, channel_id, filepath, name,
            except (requests.ConnectionError, requests.Timeout) as e:
                retries += 1
                if retries > MAX_RETRIES:
-                    print(
+                    print(f"\n    Upload failed after {MAX_RETRIES} retries: {e}")
                        f"\n    Upload failed after {MAX_RETRIES} retries: {e}")
                    return False, None
                wait = min(2**retries, 60)
-                print(f"\n    Connection error, retry {retries}/{MAX_RETRIES} "
+                print(
-                      f"in {wait}s ...")
+                    f"\n    Connection error, retry {retries}/{MAX_RETRIES} "
                    f"in {wait}s ..."
                )
                time.sleep(wait)
                try:
                    offset = _query_offset(upload_url, token, file_size)
@@ -261,8 +298,7 @@ def upload_video(base, token, channel_id, filepath, name,
                retries = 0
            elif r.status_code == 200:
-                print(
+                print(f"    {fmt_size(file_size)}/{fmt_size(file_size)}  (100%)")
                    f"    {fmt_size(file_size)}/{fmt_size(file_size)}  (100%)")
                uuid = r.json().get("video", {}).get("uuid")
                return True, uuid
@@ -270,11 +306,9 @@ def upload_video(base, token, channel_id, filepath, name,
                retry_after = int(r.headers.get("Retry-After", 10))
                retries += 1
                if retries > MAX_RETRIES:
-                    print(
+                    print(f"\n    Upload failed: server returned {r.status_code}")
                        f"\n    Upload failed: server returned {r.status_code}")
                    return False, None
-                print(
+                print(f"\n    Server {r.status_code}, retry in {retry_after}s ...")
                    f"\n    Server {r.status_code}, retry in {retry_after}s ...")
                time.sleep(retry_after)
                try:
                    offset = _query_offset(upload_url, token, file_size)
@@ -301,7 +335,7 @@ _STATE = {
 }
-def get_video_state(base, token, uuid):
+def get_video_state(base: str, token: str, uuid: str) -> tuple[int, str]:
    r = requests.get(
        f"{base}/api/v1/videos/{uuid}",
        headers=api_headers(token),
@@ -312,7 +346,7 @@ def get_video_state(base, token, uuid):
    return state["id"], state.get("label", "")
-def wait_for_published(base, token, uuid, poll_interval):
+def wait_for_published(base: str, token: str, uuid: str, poll_interval: int) -> int:
    """Block until the video reaches state 1 (Published) or a failure state."""
    started = time.monotonic()
    while True:
@@ -329,8 +363,10 @@ def wait_for_published(base, token, uuid, poll_interval):
        try:
            sid, label = get_video_state(base, token, uuid)
        except requests.exceptions.RequestException as e:
-            print(f"    -> Poll error ({e.__class__.__name__}) "
+            print(
-                  f"after {elapsed_str}, retrying in {poll_interval}s …")
+                f"    -> Poll error ({e.__class__.__name__}) "
                f"after {elapsed_str}, retrying in {poll_interval}s …"
            )
            time.sleep(poll_interval)
            continue
@@ -343,13 +379,16 @@ def wait_for_published(base, token, uuid, poll_interval):
            print(f"    -> FAILED: {display}")
            return sid
-        print(f"    -> {display} … {elapsed_str} elapsed (next check in {poll_interval}s)")
+        print(
            f"    -> {display} … {elapsed_str} elapsed (next check in {poll_interval}s)"
        )
        time.sleep(poll_interval)
 # ── State tracker ────────────────────────────────────────────────────
-def load_uploaded(input_dir):
+
 def load_uploaded(input_dir: str) -> set[Path]:
    path = Path(input_dir) / UPLOADED_FILE
    if not path.exists():
        return set()
@@ -357,36 +396,60 @@ def load_uploaded(input_dir):
        return {Path(line.strip()) for line in f if line.strip()}
-def mark_uploaded(input_dir, rel_path):
+def mark_uploaded(input_dir: str, rel_path: Path) -> None:
    with open(Path(input_dir) / UPLOADED_FILE, "a") as f:
        f.write(f"{rel_path}\n")
 # ── File / metadata helpers ─────────────────────────────────────────
-def build_path_to_meta(video_map, input_dir):
+
-    """Map each expected download path (relative) to {title, description}."""
+def build_path_to_meta(
    video_map: dict[str, Any],
    input_dir: str,
 ) -> dict[Path, dict[str, str]]:
    """Map each expected download path (relative) to {title, description, original_filename}."""
    urls = collect_urls(video_map)
    mode = read_mode(input_dir) or MODE_ORIGINAL
    paths = get_paths_for_mode(mode, urls, video_map, input_dir)
-    url_meta = {}
+    url_to_site: dict[str, str] = {}
-    for entry in video_map.values():
+    for site_key in SITES:
-        t = entry.get("title", "")
+        for entry in load_video_map(site_key).values():
-        d = entry.get("description", "")
+            for vid_url in entry.get("videos", []):
-        for video_url in entry.get("videos", []):
+                url_to_site[vid_url] = site_key
            if video_url not in url_meta:
                url_meta[video_url] = {"title": t, "description": d}
-    result = {}
+    paths = get_paths_for_mode(mode, urls, video_map, input_dir, url_to_site)
    url_meta: dict[str, dict[str, str]] = {}
    for entry_any in video_map.values():
        entry = cast(dict[str, Any], entry_any)
        t = entry.get("title")
        d = entry.get("description")
        title = t if isinstance(t, str) else ""
        desc = d if isinstance(d, str) else ""
        videos_any = entry.get("videos", [])
        if isinstance(videos_any, list):
            for video_url_any in videos_any:
                if not isinstance(video_url_any, str):
                    continue
                if video_url_any not in url_meta:
                    url_meta[video_url_any] = {"title": title, "description": desc}
    result: dict[Path, dict[str, str]] = {}
    for url, abs_path in paths.items():
-        rel = Path(abs_path).relative_to(input_dir)
+        rel = abs_path.relative_to(input_dir)
        meta = url_meta.get(url, {"title": "", "description": ""})
-        result[rel] = {**meta, "original_filename": url_to_filename(url)}
+        result[rel] = {
            "title": meta.get("title", ""),
            "description": meta.get("description", ""),
            "original_filename": url_to_filename(url),
        }
    return result
-def find_videos(input_dir):
+def find_videos(input_dir: str) -> set[Path]:
    """Walk input_dir and return a set of relative paths for all video files."""
    found = set()
    for root, dirs, files in os.walk(input_dir):
@@ -399,7 +462,12 @@ def find_videos(input_dir):
 # ── Channel match helpers ─────────────────────────────────────────────
-def _channel_match(rel, path_meta, existing):
+
 def _channel_match(
    rel: Path,
    path_meta: dict[Path, dict[str, str]],
    existing: set[str],
 ) -> tuple[bool, str]:
    """Return (matched, name) for a local file against the channel name set.
    Checks both the title-derived name and the original-filename-derived name
@@ -409,38 +477,62 @@ def _channel_match(rel, path_meta, existing):
    """
    meta = path_meta.get(rel, {})
    name = make_pt_name(meta.get("title", ""), rel.name)
    orig_fn = meta.get("original_filename", "")
-    raw_name = make_pt_name("", orig_fn) if orig_fn else None
+    raw_name: str | None = make_pt_name("", orig_fn) if orig_fn else None
-    matched = name in existing or (raw_name and raw_name != name and raw_name in existing)
+
    matched = name in existing
    if not matched and raw_name is not None and raw_name != name:
        matched = raw_name in existing
    return matched, name
 # ── CLI ──────────────────────────────────────────────────────────────
-def main():
+
 def main() -> None:
    ap = argparse.ArgumentParser(
        description="Upload videos to PeerTube with transcoding-aware batching",
    )
-    ap.add_argument("--input", "-i", default=DEFAULT_OUTPUT,
+    ap.add_argument(
-                    help=f"Directory with downloaded videos (default: {DEFAULT_OUTPUT})")
+        "--input",
-    ap.add_argument("--url",
+        "-i",
-                    help="PeerTube instance URL (or set PEERTUBE_URL env var)")
+        default=DEFAULT_OUTPUT,
-    ap.add_argument("--username", "-U",
+        help=f"Directory with downloaded videos (default: {DEFAULT_OUTPUT})",
-                    help="PeerTube username (or set PEERTUBE_USER env var)")
+    )
-    ap.add_argument("--password", "-p",
+    ap.add_argument("--url", help="PeerTube instance URL (or set PEERTUBE_URL env var)")
-                    help="PeerTube password (or set PEERTUBE_PASSWORD env var)")
+    ap.add_argument(
-    ap.add_argument("--channel", "-C",
+        "--username", "-U", help="PeerTube username (or set PEERTUBE_USER env var)"
-                    help="Channel to upload to (or set PEERTUBE_CHANNEL env var)")
+    )
-    ap.add_argument("--batch-size", "-b", type=int, default=DEFAULT_BATCH_SIZE,
+    ap.add_argument(
-                    help="Videos to upload before waiting for transcoding (default: 1)")
+        "--password", "-p", help="PeerTube password (or set PEERTUBE_PASSWORD env var)"
-    ap.add_argument("--poll-interval", type=int, default=DEFAULT_POLL,
+    )
-                    help=f"Seconds between state polls (default: {DEFAULT_POLL})")
+    ap.add_argument(
-    ap.add_argument("--skip-wait", action="store_true",
+        "--channel", "-C", help="Channel to upload to (or set PEERTUBE_CHANNEL env var)"
-                    help="Upload everything without waiting for transcoding")
+    )
-    ap.add_argument("--nsfw", action="store_true",
+    ap.add_argument(
-                    help="Mark videos as NSFW")
+        "--batch-size",
-    ap.add_argument("--dry-run", "-n", action="store_true",
+        "-b",
-                    help="Preview what would be uploaded")
+        type=int,
        default=DEFAULT_BATCH_SIZE,
        help="Videos to upload before waiting for transcoding (default: 1)",
    )
    ap.add_argument(
        "--poll-interval",
        type=int,
        default=DEFAULT_POLL,
        help=f"Seconds between state polls (default: {DEFAULT_POLL})",
    )
    ap.add_argument(
        "--skip-wait",
        action="store_true",
        help="Upload everything without waiting for transcoding",
    )
    ap.add_argument("--nsfw", action="store_true", help="Mark videos as NSFW")
    ap.add_argument(
        "--dry-run", "-n", action="store_true", help="Preview what would be uploaded"
    )
    args = ap.parse_args()
    url = args.url or os.environ.get("PEERTUBE_URL")
@@ -449,12 +541,16 @@ def main():
    password = args.password or os.environ.get("PEERTUBE_PASSWORD")
    if not args.dry_run:
-        missing = [label for label, val in [
+        missing = [
            label
            for label, val in [
                ("--url / PEERTUBE_URL", url),
                ("--username / PEERTUBE_USER", username),
                ("--channel / PEERTUBE_CHANNEL", channel),
                ("--password / PEERTUBE_PASSWORD", password),
-        ] if not val]
+            ]
            if not val
        ]
        if missing:
            for label in missing:
                print(f"[!] Required: {label}")
@@ -468,7 +564,8 @@ def main():
    unmatched = on_disk - set(path_meta.keys())
    if unmatched:
        print(
-            f"[!] {len(unmatched)} file(s) on disk not in video_map (will use filename as title)")
+            f"[!] {len(unmatched)} file(s) on disk not in video_map (will use filename as title)"
        )
        for rel in unmatched:
            path_meta[rel] = {"title": "", "description": ""}
@@ -493,10 +590,14 @@ def main():
            sz = (Path(args.input) / rel).stat().st_size
            total_bytes += sz
            print(f"  [{fmt_size(sz):>10}]  {name}")
-        print(
+        print(f"\n  Total: {fmt_size(total_bytes)} across {len(pending)} videos")
            f"\n  Total: {fmt_size(total_bytes)} across {len(pending)} videos")
        return
    assert url is not None
    assert username is not None
    assert channel is not None
    assert password is not None
    # ── authenticate ──
    base = url.rstrip("/")
    if not base.startswith("http"):
@@ -533,7 +634,9 @@ def main():
        if _channel_match(rel, path_meta, existing)[0]:
            pre_matched.append(rel)
    if pre_matched:
-        print(f"\n[+] Pre-sweep: {len(pre_matched)} local file(s) already on channel — marking uploaded")
+        print(
            f"\n[+] Pre-sweep: {len(pre_matched)} local file(s) already on channel — marking uploaded"
        )
        for rel in pre_matched:
            mark_uploaded(args.input, rel)
        pending = [rel for rel in pending if rel not in set(pre_matched)]
@@ -548,7 +651,8 @@ def main():
            # ── flush batch if full ──
            if not args.skip_wait and len(batch) >= args.batch_size:
                print(
-                    f"\n[+] Waiting for {len(batch)} video(s) to finish processing ...")
+                    f"\n[+] Waiting for {len(batch)} video(s) to finish processing ..."
                )
                for uuid, bname in batch:
                    print(f"\n  [{bname}]")
                    wait_for_published(base, token, uuid, args.poll_interval)
@@ -568,18 +672,19 @@ def main():
            print(f"\n[{total_up + 1}/{len(pending)}] {name}")
            print(f"    File: {rel}  ({fmt_size(sz)})")
-            ok, uuid = upload_video(
+            ok, uuid_opt = upload_video(
-                base, token, channel_id, filepath, name, desc, nsfw)
+                base, token, channel_id, filepath, name, desc, nsfw
            )
            if not ok:
                continue
-            print(f"    Uploaded  uuid={uuid}")
+            print(f"    Uploaded  uuid={uuid_opt}")
            mark_uploaded(args.input, rel)
            total_up += 1
            existing.add(name)
-            if uuid:
+            if uuid_opt is not None:
-                batch.append((uuid, name))
+                batch.append((uuid_opt, name))
        # ── wait for final batch ──
        if batch and not args.skip_wait:
@@ -589,8 +694,7 @@ def main():
                wait_for_published(base, token, uuid, args.poll_interval)
    except KeyboardInterrupt:
-        print(
+        print(f"\n\n[!] Interrupted after {total_up} uploads. Re-run to continue.")
            f"\n\n[!] Interrupted after {total_up} uploads. Re-run to continue.")
        sys.exit(130)
    print(f"\n{'=' * 50}")