Cache siszes

site
2026-06-30 00:27:13 +00:00 · 2026-05-10 18:59:07 +02:00 · 2026-05-10 18:44:41 +02:00
8 changed files with 3073 additions and 45 deletions
@@ -1,14 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "Bash(jq -r \".\"\"/api/v1/video-channels/{channelHandle}/videos\"\".get.parameters[] | $ref\")",
-      "Bash(grep '$ref')",
-      "Bash(grep -c \"^    ''/\" /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.yaml)",
-      "Bash(grep -c '    \"\"/' /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.json)",
-      "Bash(grep -c \"^  ''/\" /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.yaml)",
-      "Bash(git --no-pager diff HEAD~1 docs/external/peertube/openapi.json)",
-      "Bash(git --no-pager show HEAD~1:docs/external/peertube/openapi.json)",
-      "Bash(git --no-pager diff --name-only HEAD~1)"
-    ]
-  }
-}
@@ -13,6 +13,11 @@ PINKCUFFS_USERNAME=your-email-or-username
 PINKCUFFS_PASSWORD=your-password
 PINKCUFFS_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>

+# femuniverse.com credentials (separate membership)
+FEMUNIVERSE_USERNAME=your-email-or-username
+FEMUNIVERSE_PASSWORD=your-password
+FEMUNIVERSE_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>
+
 # PeerTube upload target
 PEERTUBE_URL=https://your-peertube-instance.example
 PEERTUBE_USER=admin
@@ -38,6 +38,8 @@ jobs:
          JAILBIRDZ_PASSWORD: ${{ secrets.JAILBIRDZ_PASSWORD }}
          PINKCUFFS_USERNAME: ${{ secrets.PINKCUFFS_USERNAME }}
          PINKCUFFS_PASSWORD: ${{ secrets.PINKCUFFS_PASSWORD }}
+          FEMUNIVERSE_USERNAME: ${{ secrets.FEMUNIVERSE_USERNAME }}
+          FEMUNIVERSE_PASSWORD: ${{ secrets.FEMUNIVERSE_PASSWORD }}

      - name: Commit updated video_map.json
        if: always()  # save progress even if main.py crashed or timed out
@@ -1,3 +1,6 @@
+# Local Claude instructions
+.claude/settings.local.json
+
 # Temporary cache
 __pycache__/
 .ruff_cache/
@@ -1,6 +1,6 @@
 # 𝒥𝒶𝒾𝓁𝒷𝒾𝓇𝒹𝓏-𝒹𝓁

-Jailbirdz.com and Pinkcuffs.com are Arizona-based subscription video sites publishing arrest and jail roleplay scenarios featuring women. This tool scrapes the member area of one or both sites, downloads the videos, and re-hosts them on a self-owned PeerTube instance.
+Jailbirdz.com, Pinkcuffs.com, and Femuniverse.com are Arizona-based subscription video sites publishing arrest and jail roleplay scenarios featuring women. This tool scrapes the member area of any combination of these sites, downloads the videos, and re-hosts them on a self-owned PeerTube instance.

 > [!NOTE]  
 > This tool does not bypass authentication, modify the site, or intercept anything it isn't entitled to. A valid, paid membership is required. The scraper authenticates using your own session cookie and accesses only content your account can already view in a browser.
@@ -23,9 +23,9 @@ cp .env.example .env

 Set credentials for whichever sites you have a membership on. You don't need both.

-**Option A — credentials (recommended):** set `JAILBIRDZ_USERNAME` + `JAILBIRDZ_PASSWORD` (and/or the `PINKCUFFS_*` equivalents) in `.env`. `main.py` logs in automatically on startup.
+**Option A — credentials (recommended):** set `JAILBIRDZ_USERNAME` + `JAILBIRDZ_PASSWORD` (and/or the `PINKCUFFS_*` / `FEMUNIVERSE_*` equivalents) in `.env`. `main.py` logs in automatically on startup.

-**Option B — manual cookie:** set `JAILBIRDZ_LOGIN_COOKIE` (and/or `PINKCUFFS_LOGIN_COOKIE`) yourself. Get the value from browser DevTools → Storage → Cookies — copy the full `name=value` of the `wordpress_logged_in_*` cookie.
+**Option B — manual cookie:** set `JAILBIRDZ_LOGIN_COOKIE` (and/or `PINKCUFFS_LOGIN_COOKIE` / `FEMUNIVERSE_LOGIN_COOKIE`) yourself. Get the value from browser DevTools → Storage → Cookies — copy the full `name=value` of the `wordpress_logged_in_*` cookie.

 Sites with no credentials are skipped automatically when running `python main.py`.

@@ -35,6 +35,8 @@ Sites with no credentials are skipped automatically when running `python main.py
 - `JAILBIRDZ_LOGIN_COOKIE` — jailbirdz.com session cookie (fallback).
 - `PINKCUFFS_USERNAME` / `PINKCUFFS_PASSWORD` — pinkcuffs.com login.
 - `PINKCUFFS_LOGIN_COOKIE` — pinkcuffs.com session cookie (fallback).
+- `FEMUNIVERSE_USERNAME` / `FEMUNIVERSE_PASSWORD` — femuniverse.com login.
+- `FEMUNIVERSE_LOGIN_COOKIE` — femuniverse.com session cookie (fallback).
 - `PEERTUBE_URL` — base URL of your PeerTube instance.
 - `PEERTUBE_USER` — PeerTube username.
 - `PEERTUBE_CHANNEL` — channel to upload to.
@@ -50,6 +52,7 @@ Discovers all post URLs via the WordPress REST API, then visits each page with a
 python main.py                    # scrape all sites you have credentials for
 python main.py --site jailbirdz   # scrape one site only
 python main.py --site pinkcuffs --site jailbirdz  # explicit multi-site
+python main.py --site femuniverse                 # femuniverse only
 ```

 Results are written to `video_map.json`. Safe to re-run — already-scraped posts are skipped.
@@ -66,7 +69,7 @@ Options:
      --reorganize      Rename existing files to match current naming mode
  -w, --workers N       Concurrent downloads (default: 4)
  -n, --dry-run         Print what would be downloaded
-      --site SITE       Limit to one site (jailbirdz or pinkcuffs); repeatable
+      --site SITE       Limit to one site (jailbirdz, pinkcuffs, or femuniverse); repeatable
 ```

 Resumes partial downloads. The chosen naming mode is saved to `.naming_mode` inside the output directory and persists across runs. Filenames that would clash are placed into subfolders.
@@ -105,6 +108,10 @@ gh secret set JAILBIRDZ_PASSWORD
 # pinkcuffs (if you have a membership)
 gh secret set PINKCUFFS_USERNAME
 gh secret set PINKCUFFS_PASSWORD
+
+# femuniverse (if you have a membership)
+gh secret set FEMUNIVERSE_USERNAME
+gh secret set FEMUNIVERSE_PASSWORD
 ```

 **Seed CI with your current progress before the first run:**
@@ -178,27 +178,72 @@ def find_clashes(urls: list[str]) -> dict[str, list[str]]:
    }


-def _clash_subfolder(url: str) -> str:
-    """Parent path segment used as disambiguator for clashing filenames."""
-    parts = urlparse(url).path.rstrip("/").split("/")
-    return unquote(parts[-2]) if len(parts) >= 2 else "unknown"
+def _path_folders(url: str) -> list[str]:
+    """Decoded URL path segments above the filename (filename excluded)."""
+    parts = [unquote(p) for p in urlparse(url).path.split("/") if p]
+    return parts[:-1]
+
+
+def _disambiguate_group(group: list[str]) -> dict[str, tuple[str, ...]]:
+    """Find the smallest depth of trailing folder segments that gives every URL in the group
+    a unique subfolder path. Returns {url: subfolder_segments}.
+
+    Comparison is case-insensitive so the result is safe on NTFS/APFS as well as ext4.
+    """
+    folders = {u: _path_folders(u) for u in group}
+    max_depth = max((len(f) for f in folders.values()), default=0)
+
+    for depth in range(1, max_depth + 1):
+        keys = {u: tuple(p.lower() for p in folders[u][-depth:]) for u in group}
+        if len(set(keys.values())) == len(group):
+            return {u: tuple(folders[u][-depth:]) for u in group}
+
+    raise RuntimeError(
+        f"Cannot disambiguate URL group sharing filename and full parent path: {group}"
+    )


 def build_download_paths(
    urls: list[str],
    output_dir: str | Path,
 ) -> dict[str, Path]:
-    """Map each URL to a local file path. Flat layout; clashing names get a subfolder."""
-    clashes = find_clashes(urls)
-    clash_lower = {name.lower() for name in clashes}
+    """Map each URL to a unique local file path.

-    paths = {}
+    Unique filenames go directly under output_dir. Filenames that clash
+    (case-insensitively) get the smallest tail of their URL path prepended
+    that makes every URL in the clashing group unique — e.g. /2018/Daisy/foo.mp4
+    and /2023/Daisy/foo.mp4 land at 2018/Daisy/foo.mp4 and 2023/Daisy/foo.mp4
+    rather than colliding at Daisy/foo.mp4.
+    """
+    by_lower: defaultdict[str, list[str]] = defaultdict(list)
    for url in urls:
-        filename = url_to_filename(url)
-        if filename.lower() in clash_lower:
-            paths[url] = Path(output_dir) / _clash_subfolder(url) / filename
-        else:
-            paths[url] = Path(output_dir) / filename
+        by_lower[url_to_filename(url).lower()].append(url)
+
+    base = Path(output_dir)
+    paths: dict[str, Path] = {}
+
+    for group in by_lower.values():
+        if len(group) == 1:
+            url = group[0]
+            paths[url] = base / url_to_filename(url)
+            continue
+        subfolders = _disambiguate_group(group)
+        for url in group:
+            paths[url] = base.joinpath(*subfolders[url]) / url_to_filename(url)
+
+    # Defensive: every URL must map to a distinct destination path.
+    # Case-fold the comparison since callers commonly run on NTFS/APFS where
+    # "Daisy/foo" and "daisy/foo" are the same file on disk.
+    seen: dict[str, str] = {}
+    for url, p in paths.items():
+        key = str(p).lower()
+        if key in seen:
+            raise RuntimeError(
+                f"Path collision after disambiguation: {url!r} and {seen[key]!r} "
+                f"both map to {p}"
+            )
+        seen[key] = url
+
    return paths


@@ -15,4 +15,9 @@ SITES: Final[dict[str, dict[str, str]]] = {
        "cookie_domain": "pinkcuffs.com",
        "env_prefix": "PINKCUFFS",
    },
+    "femuniverse": {
+        "base_url": "https://www.femuniverse.com",
+        "cookie_domain": "femuniverse.com",
+        "env_prefix": "FEMUNIVERSE",
+    },
 }
Author	SHA1	Message	Date
HugeFrog24	1f81ae7b93	Cache siszes	2026-05-10 18:59:07 +02:00
HugeFrog24	4f7101698a	site	2026-05-10 18:44:41 +02:00