Compare commits

...

2 Commits

Author SHA1 Message Date
HugeFrog24 1f81ae7b93 Cache siszes 2026-05-10 18:59:07 +02:00
HugeFrog24 4f7101698a site 2026-05-10 18:44:41 +02:00
8 changed files with 3073 additions and 45 deletions
-14
View File
@@ -1,14 +0,0 @@
{
"permissions": {
"allow": [
"Bash(jq -r \".\"\"/api/v1/video-channels/{channelHandle}/videos\"\".get.parameters[] | $ref\")",
"Bash(grep '$ref')",
"Bash(grep -c \"^ ''/\" /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.yaml)",
"Bash(grep -c ' \"\"/' /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.json)",
"Bash(grep -c \"^ ''/\" /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.yaml)",
"Bash(git --no-pager diff HEAD~1 docs/external/peertube/openapi.json)",
"Bash(git --no-pager show HEAD~1:docs/external/peertube/openapi.json)",
"Bash(git --no-pager diff --name-only HEAD~1)"
]
}
}
+5
View File
@@ -13,6 +13,11 @@ PINKCUFFS_USERNAME=your-email-or-username
PINKCUFFS_PASSWORD=your-password
PINKCUFFS_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>
# femuniverse.com credentials (separate membership)
FEMUNIVERSE_USERNAME=your-email-or-username
FEMUNIVERSE_PASSWORD=your-password
FEMUNIVERSE_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>
# PeerTube upload target
PEERTUBE_URL=https://your-peertube-instance.example
PEERTUBE_USER=admin
+2
View File
@@ -38,6 +38,8 @@ jobs:
JAILBIRDZ_PASSWORD: ${{ secrets.JAILBIRDZ_PASSWORD }}
PINKCUFFS_USERNAME: ${{ secrets.PINKCUFFS_USERNAME }}
PINKCUFFS_PASSWORD: ${{ secrets.PINKCUFFS_PASSWORD }}
FEMUNIVERSE_USERNAME: ${{ secrets.FEMUNIVERSE_USERNAME }}
FEMUNIVERSE_PASSWORD: ${{ secrets.FEMUNIVERSE_PASSWORD }}
- name: Commit updated video_map.json
if: always() # save progress even if main.py crashed or timed out
+3
View File
@@ -1,3 +1,6 @@
# Local Claude instructions
.claude/settings.local.json
# Temporary cache
__pycache__/
.ruff_cache/
+11 -4
View File
@@ -1,6 +1,6 @@
# 𝒥𝒶𝒾𝓁𝒷𝒾𝓇𝒹𝓏-𝒹𝓁
Jailbirdz.com and Pinkcuffs.com are Arizona-based subscription video sites publishing arrest and jail roleplay scenarios featuring women. This tool scrapes the member area of one or both sites, downloads the videos, and re-hosts them on a self-owned PeerTube instance.
Jailbirdz.com, Pinkcuffs.com, and Femuniverse.com are Arizona-based subscription video sites publishing arrest and jail roleplay scenarios featuring women. This tool scrapes the member area of any combination of these sites, downloads the videos, and re-hosts them on a self-owned PeerTube instance.
> [!NOTE]
> This tool does not bypass authentication, modify the site, or intercept anything it isn't entitled to. A valid, paid membership is required. The scraper authenticates using your own session cookie and accesses only content your account can already view in a browser.
@@ -23,9 +23,9 @@ cp .env.example .env
Set credentials for whichever sites you have a membership on. You don't need both.
**Option A — credentials (recommended):** set `JAILBIRDZ_USERNAME` + `JAILBIRDZ_PASSWORD` (and/or the `PINKCUFFS_*` equivalents) in `.env`. `main.py` logs in automatically on startup.
**Option A — credentials (recommended):** set `JAILBIRDZ_USERNAME` + `JAILBIRDZ_PASSWORD` (and/or the `PINKCUFFS_*` / `FEMUNIVERSE_*` equivalents) in `.env`. `main.py` logs in automatically on startup.
**Option B — manual cookie:** set `JAILBIRDZ_LOGIN_COOKIE` (and/or `PINKCUFFS_LOGIN_COOKIE`) yourself. Get the value from browser DevTools → Storage → Cookies — copy the full `name=value` of the `wordpress_logged_in_*` cookie.
**Option B — manual cookie:** set `JAILBIRDZ_LOGIN_COOKIE` (and/or `PINKCUFFS_LOGIN_COOKIE` / `FEMUNIVERSE_LOGIN_COOKIE`) yourself. Get the value from browser DevTools → Storage → Cookies — copy the full `name=value` of the `wordpress_logged_in_*` cookie.
Sites with no credentials are skipped automatically when running `python main.py`.
@@ -35,6 +35,8 @@ Sites with no credentials are skipped automatically when running `python main.py
- `JAILBIRDZ_LOGIN_COOKIE` — jailbirdz.com session cookie (fallback).
- `PINKCUFFS_USERNAME` / `PINKCUFFS_PASSWORD` — pinkcuffs.com login.
- `PINKCUFFS_LOGIN_COOKIE` — pinkcuffs.com session cookie (fallback).
- `FEMUNIVERSE_USERNAME` / `FEMUNIVERSE_PASSWORD` — femuniverse.com login.
- `FEMUNIVERSE_LOGIN_COOKIE` — femuniverse.com session cookie (fallback).
- `PEERTUBE_URL` — base URL of your PeerTube instance.
- `PEERTUBE_USER` — PeerTube username.
- `PEERTUBE_CHANNEL` — channel to upload to.
@@ -50,6 +52,7 @@ Discovers all post URLs via the WordPress REST API, then visits each page with a
python main.py # scrape all sites you have credentials for
python main.py --site jailbirdz # scrape one site only
python main.py --site pinkcuffs --site jailbirdz # explicit multi-site
python main.py --site femuniverse # femuniverse only
```
Results are written to `video_map.json`. Safe to re-run — already-scraped posts are skipped.
@@ -66,7 +69,7 @@ Options:
--reorganize Rename existing files to match current naming mode
-w, --workers N Concurrent downloads (default: 4)
-n, --dry-run Print what would be downloaded
--site SITE Limit to one site (jailbirdz or pinkcuffs); repeatable
--site SITE Limit to one site (jailbirdz, pinkcuffs, or femuniverse); repeatable
```
Resumes partial downloads. The chosen naming mode is saved to `.naming_mode` inside the output directory and persists across runs. Filenames that would clash are placed into subfolders.
@@ -105,6 +108,10 @@ gh secret set JAILBIRDZ_PASSWORD
# pinkcuffs (if you have a membership)
gh secret set PINKCUFFS_USERNAME
gh secret set PINKCUFFS_PASSWORD
# femuniverse (if you have a membership)
gh secret set FEMUNIVERSE_USERNAME
gh secret set FEMUNIVERSE_PASSWORD
```
**Seed CI with your current progress before the first run:**
+58 -13
View File
@@ -178,27 +178,72 @@ def find_clashes(urls: list[str]) -> dict[str, list[str]]:
}
def _clash_subfolder(url: str) -> str:
"""Parent path segment used as disambiguator for clashing filenames."""
parts = urlparse(url).path.rstrip("/").split("/")
return unquote(parts[-2]) if len(parts) >= 2 else "unknown"
def _path_folders(url: str) -> list[str]:
"""Decoded URL path segments above the filename (filename excluded)."""
parts = [unquote(p) for p in urlparse(url).path.split("/") if p]
return parts[:-1]
def _disambiguate_group(group: list[str]) -> dict[str, tuple[str, ...]]:
"""Find the smallest depth of trailing folder segments that gives every URL in the group
a unique subfolder path. Returns {url: subfolder_segments}.
Comparison is case-insensitive so the result is safe on NTFS/APFS as well as ext4.
"""
folders = {u: _path_folders(u) for u in group}
max_depth = max((len(f) for f in folders.values()), default=0)
for depth in range(1, max_depth + 1):
keys = {u: tuple(p.lower() for p in folders[u][-depth:]) for u in group}
if len(set(keys.values())) == len(group):
return {u: tuple(folders[u][-depth:]) for u in group}
raise RuntimeError(
f"Cannot disambiguate URL group sharing filename and full parent path: {group}"
)
def build_download_paths(
urls: list[str],
output_dir: str | Path,
) -> dict[str, Path]:
"""Map each URL to a local file path. Flat layout; clashing names get a subfolder."""
clashes = find_clashes(urls)
clash_lower = {name.lower() for name in clashes}
"""Map each URL to a unique local file path.
paths = {}
Unique filenames go directly under output_dir. Filenames that clash
(case-insensitively) get the smallest tail of their URL path prepended
that makes every URL in the clashing group unique — e.g. /2018/Daisy/foo.mp4
and /2023/Daisy/foo.mp4 land at 2018/Daisy/foo.mp4 and 2023/Daisy/foo.mp4
rather than colliding at Daisy/foo.mp4.
"""
by_lower: defaultdict[str, list[str]] = defaultdict(list)
for url in urls:
filename = url_to_filename(url)
if filename.lower() in clash_lower:
paths[url] = Path(output_dir) / _clash_subfolder(url) / filename
else:
paths[url] = Path(output_dir) / filename
by_lower[url_to_filename(url).lower()].append(url)
base = Path(output_dir)
paths: dict[str, Path] = {}
for group in by_lower.values():
if len(group) == 1:
url = group[0]
paths[url] = base / url_to_filename(url)
continue
subfolders = _disambiguate_group(group)
for url in group:
paths[url] = base.joinpath(*subfolders[url]) / url_to_filename(url)
# Defensive: every URL must map to a distinct destination path.
# Case-fold the comparison since callers commonly run on NTFS/APFS where
# "Daisy/foo" and "daisy/foo" are the same file on disk.
seen: dict[str, str] = {}
for url, p in paths.items():
key = str(p).lower()
if key in seen:
raise RuntimeError(
f"Path collision after disambiguation: {url!r} and {seen[key]!r} "
f"both map to {p}"
)
seen[key] = url
return paths
+5
View File
@@ -15,4 +15,9 @@ SITES: Final[dict[str, dict[str, str]]] = {
"cookie_domain": "pinkcuffs.com",
"env_prefix": "PINKCUFFS",
},
"femuniverse": {
"base_url": "https://www.femuniverse.com",
"cookie_domain": "femuniverse.com",
"env_prefix": "FEMUNIVERSE",
},
}
+2989 -14
View File
File diff suppressed because one or more lines are too long