Compare commits

...

46 Commits

Author SHA1 Message Date
github-actions[bot] eef99fddef chore: nightly index update [skip ci] 2026-06-29 08:13:58 +00:00
github-actions[bot] d148ac838c chore: nightly index update [skip ci] 2026-06-27 06:31:19 +00:00
github-actions[bot] b6ae4b3e1a chore: nightly index update [skip ci] 2026-06-26 06:57:02 +00:00
github-actions[bot] 2db483370b chore: nightly index update [skip ci] 2026-06-25 06:49:57 +00:00
github-actions[bot] 308f908e91 chore: nightly index update [skip ci] 2026-06-24 06:50:34 +00:00
github-actions[bot] b5ec477aaf chore: nightly index update [skip ci] 2026-06-22 08:58:40 +00:00
github-actions[bot] d26612660f chore: nightly index update [skip ci] 2026-06-21 08:00:59 +00:00
github-actions[bot] c01e48c5ed chore: nightly index update [skip ci] 2026-06-20 07:01:56 +00:00
github-actions[bot] 3725fffef2 chore: nightly index update [skip ci] 2026-06-19 08:33:03 +00:00
HugeFrog24 f2ab0b53fa Description length fix 2026-06-18 16:01:55 +02:00
HugeFrog24 628cd1a1d9 Update OpenAPI spec 2026-06-18 15:43:40 +02:00
github-actions[bot] bf3f8c487c chore: nightly index update [skip ci] 2026-06-18 08:08:30 +00:00
github-actions[bot] 7b7f943c35 chore: nightly index update [skip ci] 2026-06-16 08:46:34 +00:00
github-actions[bot] 1e69e121a1 chore: nightly index update [skip ci] 2026-06-15 09:22:27 +00:00
github-actions[bot] a7458df9db chore: nightly index update [skip ci] 2026-06-14 07:23:48 +00:00
github-actions[bot] f44b17a177 chore: nightly index update [skip ci] 2026-06-13 06:59:20 +00:00
github-actions[bot] d340c5e431 chore: nightly index update [skip ci] 2026-06-12 07:23:41 +00:00
github-actions[bot] cb2404cb8c chore: nightly index update [skip ci] 2026-06-11 08:03:59 +00:00
github-actions[bot] 13441566ee chore: nightly index update [skip ci] 2026-06-10 07:09:59 +00:00
github-actions[bot] 9e14559dab chore: nightly index update [skip ci] 2026-06-09 06:51:37 +00:00
github-actions[bot] bf58e361dd chore: nightly index update [skip ci] 2026-06-08 08:11:22 +00:00
github-actions[bot] 662ae5cdfa chore: nightly index update [skip ci] 2026-06-06 06:31:41 +00:00
github-actions[bot] e9b19e2d04 chore: nightly index update [skip ci] 2026-06-05 07:09:10 +00:00
github-actions[bot] 2bc6987b5d chore: nightly index update [skip ci] 2026-06-04 07:25:42 +00:00
github-actions[bot] 17629fede1 chore: nightly index update [skip ci] 2026-06-03 08:18:00 +00:00
github-actions[bot] 604be16d67 chore: nightly index update [skip ci] 2026-05-31 06:59:20 +00:00
github-actions[bot] 347b38d38d chore: nightly index update [skip ci] 2026-05-30 06:25:00 +00:00
github-actions[bot] 5e2b53d7f0 chore: nightly index update [skip ci] 2026-05-28 06:55:58 +00:00
github-actions[bot] 32e1081f81 chore: nightly index update [skip ci] 2026-05-27 07:05:24 +00:00
github-actions[bot] c3ed8730e6 chore: nightly index update [skip ci] 2026-05-26 06:47:42 +00:00
github-actions[bot] 803eb97942 chore: nightly index update [skip ci] 2026-05-25 07:16:09 +00:00
github-actions[bot] 0b076b2687 chore: nightly index update [skip ci] 2026-05-24 06:38:55 +00:00
github-actions[bot] 926e5950f7 chore: nightly index update [skip ci] 2026-05-23 06:13:57 +00:00
github-actions[bot] b582102331 chore: nightly index update [skip ci] 2026-05-22 06:50:47 +00:00
github-actions[bot] 7bf1717529 chore: nightly index update [skip ci] 2026-05-21 06:54:47 +00:00
github-actions[bot] 543aabbd26 chore: nightly index update [skip ci] 2026-05-20 06:49:36 +00:00
github-actions[bot] 00ee6bc217 chore: nightly index update [skip ci] 2026-05-19 06:49:36 +00:00
github-actions[bot] b3fece0ee4 chore: nightly index update [skip ci] 2026-05-18 06:57:42 +00:00
github-actions[bot] 2bfc3aa17a chore: nightly index update [skip ci] 2026-05-17 06:24:01 +00:00
github-actions[bot] 331b539012 chore: nightly index update [skip ci] 2026-05-16 05:55:26 +00:00
github-actions[bot] 0b923da0e8 chore: nightly index update [skip ci] 2026-05-15 06:35:09 +00:00
github-actions[bot] c921cc98d9 chore: nightly index update [skip ci] 2026-05-14 06:26:49 +00:00
github-actions[bot] 9e6082f0f5 chore: nightly index update [skip ci] 2026-05-13 06:27:46 +00:00
github-actions[bot] 40c05c1ffd chore: nightly index update [skip ci] 2026-05-12 06:16:16 +00:00
HugeFrog24 1f81ae7b93 Cache siszes 2026-05-10 18:59:07 +02:00
HugeFrog24 4f7101698a site 2026-05-10 18:44:41 +02:00
10 changed files with 6737 additions and 982 deletions
-14
View File
@@ -1,14 +0,0 @@
{
"permissions": {
"allow": [
"Bash(jq -r \".\"\"/api/v1/video-channels/{channelHandle}/videos\"\".get.parameters[] | $ref\")",
"Bash(grep '$ref')",
"Bash(grep -c \"^ ''/\" /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.yaml)",
"Bash(grep -c ' \"\"/' /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.json)",
"Bash(grep -c \"^ ''/\" /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.yaml)",
"Bash(git --no-pager diff HEAD~1 docs/external/peertube/openapi.json)",
"Bash(git --no-pager show HEAD~1:docs/external/peertube/openapi.json)",
"Bash(git --no-pager diff --name-only HEAD~1)"
]
}
}
+5
View File
@@ -13,6 +13,11 @@ PINKCUFFS_USERNAME=your-email-or-username
PINKCUFFS_PASSWORD=your-password PINKCUFFS_PASSWORD=your-password
PINKCUFFS_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value> PINKCUFFS_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>
# femuniverse.com credentials (separate membership)
FEMUNIVERSE_USERNAME=your-email-or-username
FEMUNIVERSE_PASSWORD=your-password
FEMUNIVERSE_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>
# PeerTube upload target # PeerTube upload target
PEERTUBE_URL=https://your-peertube-instance.example PEERTUBE_URL=https://your-peertube-instance.example
PEERTUBE_USER=admin PEERTUBE_USER=admin
+2
View File
@@ -38,6 +38,8 @@ jobs:
JAILBIRDZ_PASSWORD: ${{ secrets.JAILBIRDZ_PASSWORD }} JAILBIRDZ_PASSWORD: ${{ secrets.JAILBIRDZ_PASSWORD }}
PINKCUFFS_USERNAME: ${{ secrets.PINKCUFFS_USERNAME }} PINKCUFFS_USERNAME: ${{ secrets.PINKCUFFS_USERNAME }}
PINKCUFFS_PASSWORD: ${{ secrets.PINKCUFFS_PASSWORD }} PINKCUFFS_PASSWORD: ${{ secrets.PINKCUFFS_PASSWORD }}
FEMUNIVERSE_USERNAME: ${{ secrets.FEMUNIVERSE_USERNAME }}
FEMUNIVERSE_PASSWORD: ${{ secrets.FEMUNIVERSE_PASSWORD }}
- name: Commit updated video_map.json - name: Commit updated video_map.json
if: always() # save progress even if main.py crashed or timed out if: always() # save progress even if main.py crashed or timed out
+3
View File
@@ -1,3 +1,6 @@
# Local Claude instructions
.claude/settings.local.json
# Temporary cache # Temporary cache
__pycache__/ __pycache__/
.ruff_cache/ .ruff_cache/
+11 -4
View File
@@ -1,6 +1,6 @@
# 𝒥𝒶𝒾𝓁𝒷𝒾𝓇𝒹𝓏-𝒹𝓁 # 𝒥𝒶𝒾𝓁𝒷𝒾𝓇𝒹𝓏-𝒹𝓁
Jailbirdz.com and Pinkcuffs.com are Arizona-based subscription video sites publishing arrest and jail roleplay scenarios featuring women. This tool scrapes the member area of one or both sites, downloads the videos, and re-hosts them on a self-owned PeerTube instance. Jailbirdz.com, Pinkcuffs.com, and Femuniverse.com are Arizona-based subscription video sites publishing arrest and jail roleplay scenarios featuring women. This tool scrapes the member area of any combination of these sites, downloads the videos, and re-hosts them on a self-owned PeerTube instance.
> [!NOTE] > [!NOTE]
> This tool does not bypass authentication, modify the site, or intercept anything it isn't entitled to. A valid, paid membership is required. The scraper authenticates using your own session cookie and accesses only content your account can already view in a browser. > This tool does not bypass authentication, modify the site, or intercept anything it isn't entitled to. A valid, paid membership is required. The scraper authenticates using your own session cookie and accesses only content your account can already view in a browser.
@@ -23,9 +23,9 @@ cp .env.example .env
Set credentials for whichever sites you have a membership on. You don't need both. Set credentials for whichever sites you have a membership on. You don't need both.
**Option A — credentials (recommended):** set `JAILBIRDZ_USERNAME` + `JAILBIRDZ_PASSWORD` (and/or the `PINKCUFFS_*` equivalents) in `.env`. `main.py` logs in automatically on startup. **Option A — credentials (recommended):** set `JAILBIRDZ_USERNAME` + `JAILBIRDZ_PASSWORD` (and/or the `PINKCUFFS_*` / `FEMUNIVERSE_*` equivalents) in `.env`. `main.py` logs in automatically on startup.
**Option B — manual cookie:** set `JAILBIRDZ_LOGIN_COOKIE` (and/or `PINKCUFFS_LOGIN_COOKIE`) yourself. Get the value from browser DevTools → Storage → Cookies — copy the full `name=value` of the `wordpress_logged_in_*` cookie. **Option B — manual cookie:** set `JAILBIRDZ_LOGIN_COOKIE` (and/or `PINKCUFFS_LOGIN_COOKIE` / `FEMUNIVERSE_LOGIN_COOKIE`) yourself. Get the value from browser DevTools → Storage → Cookies — copy the full `name=value` of the `wordpress_logged_in_*` cookie.
Sites with no credentials are skipped automatically when running `python main.py`. Sites with no credentials are skipped automatically when running `python main.py`.
@@ -35,6 +35,8 @@ Sites with no credentials are skipped automatically when running `python main.py
- `JAILBIRDZ_LOGIN_COOKIE` — jailbirdz.com session cookie (fallback). - `JAILBIRDZ_LOGIN_COOKIE` — jailbirdz.com session cookie (fallback).
- `PINKCUFFS_USERNAME` / `PINKCUFFS_PASSWORD` — pinkcuffs.com login. - `PINKCUFFS_USERNAME` / `PINKCUFFS_PASSWORD` — pinkcuffs.com login.
- `PINKCUFFS_LOGIN_COOKIE` — pinkcuffs.com session cookie (fallback). - `PINKCUFFS_LOGIN_COOKIE` — pinkcuffs.com session cookie (fallback).
- `FEMUNIVERSE_USERNAME` / `FEMUNIVERSE_PASSWORD` — femuniverse.com login.
- `FEMUNIVERSE_LOGIN_COOKIE` — femuniverse.com session cookie (fallback).
- `PEERTUBE_URL` — base URL of your PeerTube instance. - `PEERTUBE_URL` — base URL of your PeerTube instance.
- `PEERTUBE_USER` — PeerTube username. - `PEERTUBE_USER` — PeerTube username.
- `PEERTUBE_CHANNEL` — channel to upload to. - `PEERTUBE_CHANNEL` — channel to upload to.
@@ -50,6 +52,7 @@ Discovers all post URLs via the WordPress REST API, then visits each page with a
python main.py # scrape all sites you have credentials for python main.py # scrape all sites you have credentials for
python main.py --site jailbirdz # scrape one site only python main.py --site jailbirdz # scrape one site only
python main.py --site pinkcuffs --site jailbirdz # explicit multi-site python main.py --site pinkcuffs --site jailbirdz # explicit multi-site
python main.py --site femuniverse # femuniverse only
``` ```
Results are written to `video_map.json`. Safe to re-run — already-scraped posts are skipped. Results are written to `video_map.json`. Safe to re-run — already-scraped posts are skipped.
@@ -66,7 +69,7 @@ Options:
--reorganize Rename existing files to match current naming mode --reorganize Rename existing files to match current naming mode
-w, --workers N Concurrent downloads (default: 4) -w, --workers N Concurrent downloads (default: 4)
-n, --dry-run Print what would be downloaded -n, --dry-run Print what would be downloaded
--site SITE Limit to one site (jailbirdz or pinkcuffs); repeatable --site SITE Limit to one site (jailbirdz, pinkcuffs, or femuniverse); repeatable
``` ```
Resumes partial downloads. The chosen naming mode is saved to `.naming_mode` inside the output directory and persists across runs. Filenames that would clash are placed into subfolders. Resumes partial downloads. The chosen naming mode is saved to `.naming_mode` inside the output directory and persists across runs. Filenames that would clash are placed into subfolders.
@@ -105,6 +108,10 @@ gh secret set JAILBIRDZ_PASSWORD
# pinkcuffs (if you have a membership) # pinkcuffs (if you have a membership)
gh secret set PINKCUFFS_USERNAME gh secret set PINKCUFFS_USERNAME
gh secret set PINKCUFFS_PASSWORD gh secret set PINKCUFFS_PASSWORD
# femuniverse (if you have a membership)
gh secret set FEMUNIVERSE_USERNAME
gh secret set FEMUNIVERSE_PASSWORD
``` ```
**Seed CI with your current progress before the first run:** **Seed CI with your current progress before the first run:**
+58 -13
View File
@@ -178,27 +178,72 @@ def find_clashes(urls: list[str]) -> dict[str, list[str]]:
} }
def _clash_subfolder(url: str) -> str: def _path_folders(url: str) -> list[str]:
"""Parent path segment used as disambiguator for clashing filenames.""" """Decoded URL path segments above the filename (filename excluded)."""
parts = urlparse(url).path.rstrip("/").split("/") parts = [unquote(p) for p in urlparse(url).path.split("/") if p]
return unquote(parts[-2]) if len(parts) >= 2 else "unknown" return parts[:-1]
def _disambiguate_group(group: list[str]) -> dict[str, tuple[str, ...]]:
"""Find the smallest depth of trailing folder segments that gives every URL in the group
a unique subfolder path. Returns {url: subfolder_segments}.
Comparison is case-insensitive so the result is safe on NTFS/APFS as well as ext4.
"""
folders = {u: _path_folders(u) for u in group}
max_depth = max((len(f) for f in folders.values()), default=0)
for depth in range(1, max_depth + 1):
keys = {u: tuple(p.lower() for p in folders[u][-depth:]) for u in group}
if len(set(keys.values())) == len(group):
return {u: tuple(folders[u][-depth:]) for u in group}
raise RuntimeError(
f"Cannot disambiguate URL group sharing filename and full parent path: {group}"
)
def build_download_paths( def build_download_paths(
urls: list[str], urls: list[str],
output_dir: str | Path, output_dir: str | Path,
) -> dict[str, Path]: ) -> dict[str, Path]:
"""Map each URL to a local file path. Flat layout; clashing names get a subfolder.""" """Map each URL to a unique local file path.
clashes = find_clashes(urls)
clash_lower = {name.lower() for name in clashes}
paths = {} Unique filenames go directly under output_dir. Filenames that clash
(case-insensitively) get the smallest tail of their URL path prepended
that makes every URL in the clashing group unique — e.g. /2018/Daisy/foo.mp4
and /2023/Daisy/foo.mp4 land at 2018/Daisy/foo.mp4 and 2023/Daisy/foo.mp4
rather than colliding at Daisy/foo.mp4.
"""
by_lower: defaultdict[str, list[str]] = defaultdict(list)
for url in urls: for url in urls:
filename = url_to_filename(url) by_lower[url_to_filename(url).lower()].append(url)
if filename.lower() in clash_lower:
paths[url] = Path(output_dir) / _clash_subfolder(url) / filename base = Path(output_dir)
else: paths: dict[str, Path] = {}
paths[url] = Path(output_dir) / filename
for group in by_lower.values():
if len(group) == 1:
url = group[0]
paths[url] = base / url_to_filename(url)
continue
subfolders = _disambiguate_group(group)
for url in group:
paths[url] = base.joinpath(*subfolders[url]) / url_to_filename(url)
# Defensive: every URL must map to a distinct destination path.
# Case-fold the comparison since callers commonly run on NTFS/APFS where
# "Daisy/foo" and "daisy/foo" are the same file on disk.
seen: dict[str, str] = {}
for url, p in paths.items():
key = str(p).lower()
if key in seen:
raise RuntimeError(
f"Path collision after disambiguation: {url!r} and {seen[key]!r} "
f"both map to {p}"
)
seen[key] = url
return paths return paths
+5
View File
@@ -15,4 +15,9 @@ SITES: Final[dict[str, dict[str, str]]] = {
"cookie_domain": "pinkcuffs.com", "cookie_domain": "pinkcuffs.com",
"env_prefix": "PINKCUFFS", "env_prefix": "PINKCUFFS",
}, },
"femuniverse": {
"base_url": "https://www.femuniverse.com",
"cookie_domain": "femuniverse.com",
"env_prefix": "FEMUNIVERSE",
},
} }
+3395 -949
View File
File diff suppressed because one or more lines are too long
+6
View File
@@ -49,6 +49,7 @@ DEFAULT_BATCH_SIZE = 1
DEFAULT_POLL = 30 DEFAULT_POLL = 30
UPLOADED_FILE = ".uploaded" UPLOADED_FILE = ".uploaded"
PT_NAME_MAX = 120 PT_NAME_MAX = 120
PT_DESC_MIN = 3 # PeerTube rejects descriptions shorter than this
# ── Text helpers ───────────────────────────────────────────────────── # ── Text helpers ─────────────────────────────────────────────────────
@@ -62,6 +63,11 @@ def clean_description(raw: str) -> str:
text = re.sub(r"<[^>]+>", "", text) text = re.sub(r"<[^>]+>", "", text)
text = html.unescape(text) text = html.unescape(text)
text = re.sub(r"\n{3,}", "\n\n", text).strip() text = re.sub(r"\n{3,}", "\n\n", text).strip()
# PeerTube enforces a 3-char minimum on descriptions; a sub-minimum
# description (e.g. a stray ".") makes the upload-init 400. Drop it so
# it's omitted from the request rather than rejected.
if len(text) < PT_DESC_MIN:
return ""
return text[:10000] return text[:10000]
+3263 -13
View File
File diff suppressed because one or more lines are too long