mirror of
https://github.com/HugeFrog24/jailbirdz-dl.git
synced 2026-06-30 00:27:13 +00:00
Compare commits
46 Commits
91b536b375
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| eef99fddef | |||
| d148ac838c | |||
| b6ae4b3e1a | |||
| 2db483370b | |||
| 308f908e91 | |||
| b5ec477aaf | |||
| d26612660f | |||
| c01e48c5ed | |||
| 3725fffef2 | |||
| f2ab0b53fa | |||
| 628cd1a1d9 | |||
| bf3f8c487c | |||
| 7b7f943c35 | |||
| 1e69e121a1 | |||
| a7458df9db | |||
| f44b17a177 | |||
| d340c5e431 | |||
| cb2404cb8c | |||
| 13441566ee | |||
| 9e14559dab | |||
| bf58e361dd | |||
| 662ae5cdfa | |||
| e9b19e2d04 | |||
| 2bc6987b5d | |||
| 17629fede1 | |||
| 604be16d67 | |||
| 347b38d38d | |||
| 5e2b53d7f0 | |||
| 32e1081f81 | |||
| c3ed8730e6 | |||
| 803eb97942 | |||
| 0b076b2687 | |||
| 926e5950f7 | |||
| b582102331 | |||
| 7bf1717529 | |||
| 543aabbd26 | |||
| 00ee6bc217 | |||
| b3fece0ee4 | |||
| 2bfc3aa17a | |||
| 331b539012 | |||
| 0b923da0e8 | |||
| c921cc98d9 | |||
| 9e6082f0f5 | |||
| 40c05c1ffd | |||
| 1f81ae7b93 | |||
| 4f7101698a |
@@ -1,14 +0,0 @@
|
|||||||
{
|
|
||||||
"permissions": {
|
|
||||||
"allow": [
|
|
||||||
"Bash(jq -r \".\"\"/api/v1/video-channels/{channelHandle}/videos\"\".get.parameters[] | $ref\")",
|
|
||||||
"Bash(grep '$ref')",
|
|
||||||
"Bash(grep -c \"^ ''/\" /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.yaml)",
|
|
||||||
"Bash(grep -c ' \"\"/' /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.json)",
|
|
||||||
"Bash(grep -c \"^ ''/\" /c/Users/admin/Desktop/jailbirdz-dl/docs/external/peertube/openapi.yaml)",
|
|
||||||
"Bash(git --no-pager diff HEAD~1 docs/external/peertube/openapi.json)",
|
|
||||||
"Bash(git --no-pager show HEAD~1:docs/external/peertube/openapi.json)",
|
|
||||||
"Bash(git --no-pager diff --name-only HEAD~1)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -13,6 +13,11 @@ PINKCUFFS_USERNAME=your-email-or-username
|
|||||||
PINKCUFFS_PASSWORD=your-password
|
PINKCUFFS_PASSWORD=your-password
|
||||||
PINKCUFFS_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>
|
PINKCUFFS_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>
|
||||||
|
|
||||||
|
# femuniverse.com credentials (separate membership)
|
||||||
|
FEMUNIVERSE_USERNAME=your-email-or-username
|
||||||
|
FEMUNIVERSE_PASSWORD=your-password
|
||||||
|
FEMUNIVERSE_LOGIN_COOKIE=wordpress_logged_in_<hash>=<value>
|
||||||
|
|
||||||
# PeerTube upload target
|
# PeerTube upload target
|
||||||
PEERTUBE_URL=https://your-peertube-instance.example
|
PEERTUBE_URL=https://your-peertube-instance.example
|
||||||
PEERTUBE_USER=admin
|
PEERTUBE_USER=admin
|
||||||
|
|||||||
@@ -38,6 +38,8 @@ jobs:
|
|||||||
JAILBIRDZ_PASSWORD: ${{ secrets.JAILBIRDZ_PASSWORD }}
|
JAILBIRDZ_PASSWORD: ${{ secrets.JAILBIRDZ_PASSWORD }}
|
||||||
PINKCUFFS_USERNAME: ${{ secrets.PINKCUFFS_USERNAME }}
|
PINKCUFFS_USERNAME: ${{ secrets.PINKCUFFS_USERNAME }}
|
||||||
PINKCUFFS_PASSWORD: ${{ secrets.PINKCUFFS_PASSWORD }}
|
PINKCUFFS_PASSWORD: ${{ secrets.PINKCUFFS_PASSWORD }}
|
||||||
|
FEMUNIVERSE_USERNAME: ${{ secrets.FEMUNIVERSE_USERNAME }}
|
||||||
|
FEMUNIVERSE_PASSWORD: ${{ secrets.FEMUNIVERSE_PASSWORD }}
|
||||||
|
|
||||||
- name: Commit updated video_map.json
|
- name: Commit updated video_map.json
|
||||||
if: always() # save progress even if main.py crashed or timed out
|
if: always() # save progress even if main.py crashed or timed out
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Local Claude instructions
|
||||||
|
.claude/settings.local.json
|
||||||
|
|
||||||
# Temporary cache
|
# Temporary cache
|
||||||
__pycache__/
|
__pycache__/
|
||||||
.ruff_cache/
|
.ruff_cache/
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# 𝒥𝒶𝒾𝓁𝒷𝒾𝓇𝒹𝓏-𝒹𝓁
|
# 𝒥𝒶𝒾𝓁𝒷𝒾𝓇𝒹𝓏-𝒹𝓁
|
||||||
|
|
||||||
Jailbirdz.com and Pinkcuffs.com are Arizona-based subscription video sites publishing arrest and jail roleplay scenarios featuring women. This tool scrapes the member area of one or both sites, downloads the videos, and re-hosts them on a self-owned PeerTube instance.
|
Jailbirdz.com, Pinkcuffs.com, and Femuniverse.com are Arizona-based subscription video sites publishing arrest and jail roleplay scenarios featuring women. This tool scrapes the member area of any combination of these sites, downloads the videos, and re-hosts them on a self-owned PeerTube instance.
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> This tool does not bypass authentication, modify the site, or intercept anything it isn't entitled to. A valid, paid membership is required. The scraper authenticates using your own session cookie and accesses only content your account can already view in a browser.
|
> This tool does not bypass authentication, modify the site, or intercept anything it isn't entitled to. A valid, paid membership is required. The scraper authenticates using your own session cookie and accesses only content your account can already view in a browser.
|
||||||
@@ -23,9 +23,9 @@ cp .env.example .env
|
|||||||
|
|
||||||
Set credentials for whichever sites you have a membership on. You don't need both.
|
Set credentials for whichever sites you have a membership on. You don't need both.
|
||||||
|
|
||||||
**Option A — credentials (recommended):** set `JAILBIRDZ_USERNAME` + `JAILBIRDZ_PASSWORD` (and/or the `PINKCUFFS_*` equivalents) in `.env`. `main.py` logs in automatically on startup.
|
**Option A — credentials (recommended):** set `JAILBIRDZ_USERNAME` + `JAILBIRDZ_PASSWORD` (and/or the `PINKCUFFS_*` / `FEMUNIVERSE_*` equivalents) in `.env`. `main.py` logs in automatically on startup.
|
||||||
|
|
||||||
**Option B — manual cookie:** set `JAILBIRDZ_LOGIN_COOKIE` (and/or `PINKCUFFS_LOGIN_COOKIE`) yourself. Get the value from browser DevTools → Storage → Cookies — copy the full `name=value` of the `wordpress_logged_in_*` cookie.
|
**Option B — manual cookie:** set `JAILBIRDZ_LOGIN_COOKIE` (and/or `PINKCUFFS_LOGIN_COOKIE` / `FEMUNIVERSE_LOGIN_COOKIE`) yourself. Get the value from browser DevTools → Storage → Cookies — copy the full `name=value` of the `wordpress_logged_in_*` cookie.
|
||||||
|
|
||||||
Sites with no credentials are skipped automatically when running `python main.py`.
|
Sites with no credentials are skipped automatically when running `python main.py`.
|
||||||
|
|
||||||
@@ -35,6 +35,8 @@ Sites with no credentials are skipped automatically when running `python main.py
|
|||||||
- `JAILBIRDZ_LOGIN_COOKIE` — jailbirdz.com session cookie (fallback).
|
- `JAILBIRDZ_LOGIN_COOKIE` — jailbirdz.com session cookie (fallback).
|
||||||
- `PINKCUFFS_USERNAME` / `PINKCUFFS_PASSWORD` — pinkcuffs.com login.
|
- `PINKCUFFS_USERNAME` / `PINKCUFFS_PASSWORD` — pinkcuffs.com login.
|
||||||
- `PINKCUFFS_LOGIN_COOKIE` — pinkcuffs.com session cookie (fallback).
|
- `PINKCUFFS_LOGIN_COOKIE` — pinkcuffs.com session cookie (fallback).
|
||||||
|
- `FEMUNIVERSE_USERNAME` / `FEMUNIVERSE_PASSWORD` — femuniverse.com login.
|
||||||
|
- `FEMUNIVERSE_LOGIN_COOKIE` — femuniverse.com session cookie (fallback).
|
||||||
- `PEERTUBE_URL` — base URL of your PeerTube instance.
|
- `PEERTUBE_URL` — base URL of your PeerTube instance.
|
||||||
- `PEERTUBE_USER` — PeerTube username.
|
- `PEERTUBE_USER` — PeerTube username.
|
||||||
- `PEERTUBE_CHANNEL` — channel to upload to.
|
- `PEERTUBE_CHANNEL` — channel to upload to.
|
||||||
@@ -50,6 +52,7 @@ Discovers all post URLs via the WordPress REST API, then visits each page with a
|
|||||||
python main.py # scrape all sites you have credentials for
|
python main.py # scrape all sites you have credentials for
|
||||||
python main.py --site jailbirdz # scrape one site only
|
python main.py --site jailbirdz # scrape one site only
|
||||||
python main.py --site pinkcuffs --site jailbirdz # explicit multi-site
|
python main.py --site pinkcuffs --site jailbirdz # explicit multi-site
|
||||||
|
python main.py --site femuniverse # femuniverse only
|
||||||
```
|
```
|
||||||
|
|
||||||
Results are written to `video_map.json`. Safe to re-run — already-scraped posts are skipped.
|
Results are written to `video_map.json`. Safe to re-run — already-scraped posts are skipped.
|
||||||
@@ -66,7 +69,7 @@ Options:
|
|||||||
--reorganize Rename existing files to match current naming mode
|
--reorganize Rename existing files to match current naming mode
|
||||||
-w, --workers N Concurrent downloads (default: 4)
|
-w, --workers N Concurrent downloads (default: 4)
|
||||||
-n, --dry-run Print what would be downloaded
|
-n, --dry-run Print what would be downloaded
|
||||||
--site SITE Limit to one site (jailbirdz or pinkcuffs); repeatable
|
--site SITE Limit to one site (jailbirdz, pinkcuffs, or femuniverse); repeatable
|
||||||
```
|
```
|
||||||
|
|
||||||
Resumes partial downloads. The chosen naming mode is saved to `.naming_mode` inside the output directory and persists across runs. Filenames that would clash are placed into subfolders.
|
Resumes partial downloads. The chosen naming mode is saved to `.naming_mode` inside the output directory and persists across runs. Filenames that would clash are placed into subfolders.
|
||||||
@@ -105,6 +108,10 @@ gh secret set JAILBIRDZ_PASSWORD
|
|||||||
# pinkcuffs (if you have a membership)
|
# pinkcuffs (if you have a membership)
|
||||||
gh secret set PINKCUFFS_USERNAME
|
gh secret set PINKCUFFS_USERNAME
|
||||||
gh secret set PINKCUFFS_PASSWORD
|
gh secret set PINKCUFFS_PASSWORD
|
||||||
|
|
||||||
|
# femuniverse (if you have a membership)
|
||||||
|
gh secret set FEMUNIVERSE_USERNAME
|
||||||
|
gh secret set FEMUNIVERSE_PASSWORD
|
||||||
```
|
```
|
||||||
|
|
||||||
**Seed CI with your current progress before the first run:**
|
**Seed CI with your current progress before the first run:**
|
||||||
|
|||||||
+58
-13
@@ -178,27 +178,72 @@ def find_clashes(urls: list[str]) -> dict[str, list[str]]:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _clash_subfolder(url: str) -> str:
|
def _path_folders(url: str) -> list[str]:
|
||||||
"""Parent path segment used as disambiguator for clashing filenames."""
|
"""Decoded URL path segments above the filename (filename excluded)."""
|
||||||
parts = urlparse(url).path.rstrip("/").split("/")
|
parts = [unquote(p) for p in urlparse(url).path.split("/") if p]
|
||||||
return unquote(parts[-2]) if len(parts) >= 2 else "unknown"
|
return parts[:-1]
|
||||||
|
|
||||||
|
|
||||||
|
def _disambiguate_group(group: list[str]) -> dict[str, tuple[str, ...]]:
|
||||||
|
"""Find the smallest depth of trailing folder segments that gives every URL in the group
|
||||||
|
a unique subfolder path. Returns {url: subfolder_segments}.
|
||||||
|
|
||||||
|
Comparison is case-insensitive so the result is safe on NTFS/APFS as well as ext4.
|
||||||
|
"""
|
||||||
|
folders = {u: _path_folders(u) for u in group}
|
||||||
|
max_depth = max((len(f) for f in folders.values()), default=0)
|
||||||
|
|
||||||
|
for depth in range(1, max_depth + 1):
|
||||||
|
keys = {u: tuple(p.lower() for p in folders[u][-depth:]) for u in group}
|
||||||
|
if len(set(keys.values())) == len(group):
|
||||||
|
return {u: tuple(folders[u][-depth:]) for u in group}
|
||||||
|
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Cannot disambiguate URL group sharing filename and full parent path: {group}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_download_paths(
|
def build_download_paths(
|
||||||
urls: list[str],
|
urls: list[str],
|
||||||
output_dir: str | Path,
|
output_dir: str | Path,
|
||||||
) -> dict[str, Path]:
|
) -> dict[str, Path]:
|
||||||
"""Map each URL to a local file path. Flat layout; clashing names get a subfolder."""
|
"""Map each URL to a unique local file path.
|
||||||
clashes = find_clashes(urls)
|
|
||||||
clash_lower = {name.lower() for name in clashes}
|
|
||||||
|
|
||||||
paths = {}
|
Unique filenames go directly under output_dir. Filenames that clash
|
||||||
|
(case-insensitively) get the smallest tail of their URL path prepended
|
||||||
|
that makes every URL in the clashing group unique — e.g. /2018/Daisy/foo.mp4
|
||||||
|
and /2023/Daisy/foo.mp4 land at 2018/Daisy/foo.mp4 and 2023/Daisy/foo.mp4
|
||||||
|
rather than colliding at Daisy/foo.mp4.
|
||||||
|
"""
|
||||||
|
by_lower: defaultdict[str, list[str]] = defaultdict(list)
|
||||||
for url in urls:
|
for url in urls:
|
||||||
filename = url_to_filename(url)
|
by_lower[url_to_filename(url).lower()].append(url)
|
||||||
if filename.lower() in clash_lower:
|
|
||||||
paths[url] = Path(output_dir) / _clash_subfolder(url) / filename
|
base = Path(output_dir)
|
||||||
else:
|
paths: dict[str, Path] = {}
|
||||||
paths[url] = Path(output_dir) / filename
|
|
||||||
|
for group in by_lower.values():
|
||||||
|
if len(group) == 1:
|
||||||
|
url = group[0]
|
||||||
|
paths[url] = base / url_to_filename(url)
|
||||||
|
continue
|
||||||
|
subfolders = _disambiguate_group(group)
|
||||||
|
for url in group:
|
||||||
|
paths[url] = base.joinpath(*subfolders[url]) / url_to_filename(url)
|
||||||
|
|
||||||
|
# Defensive: every URL must map to a distinct destination path.
|
||||||
|
# Case-fold the comparison since callers commonly run on NTFS/APFS where
|
||||||
|
# "Daisy/foo" and "daisy/foo" are the same file on disk.
|
||||||
|
seen: dict[str, str] = {}
|
||||||
|
for url, p in paths.items():
|
||||||
|
key = str(p).lower()
|
||||||
|
if key in seen:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Path collision after disambiguation: {url!r} and {seen[key]!r} "
|
||||||
|
f"both map to {p}"
|
||||||
|
)
|
||||||
|
seen[key] = url
|
||||||
|
|
||||||
return paths
|
return paths
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -15,4 +15,9 @@ SITES: Final[dict[str, dict[str, str]]] = {
|
|||||||
"cookie_domain": "pinkcuffs.com",
|
"cookie_domain": "pinkcuffs.com",
|
||||||
"env_prefix": "PINKCUFFS",
|
"env_prefix": "PINKCUFFS",
|
||||||
},
|
},
|
||||||
|
"femuniverse": {
|
||||||
|
"base_url": "https://www.femuniverse.com",
|
||||||
|
"cookie_domain": "femuniverse.com",
|
||||||
|
"env_prefix": "FEMUNIVERSE",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
Vendored
+3395
-949
File diff suppressed because one or more lines are too long
@@ -49,6 +49,7 @@ DEFAULT_BATCH_SIZE = 1
|
|||||||
DEFAULT_POLL = 30
|
DEFAULT_POLL = 30
|
||||||
UPLOADED_FILE = ".uploaded"
|
UPLOADED_FILE = ".uploaded"
|
||||||
PT_NAME_MAX = 120
|
PT_NAME_MAX = 120
|
||||||
|
PT_DESC_MIN = 3 # PeerTube rejects descriptions shorter than this
|
||||||
|
|
||||||
|
|
||||||
# ── Text helpers ─────────────────────────────────────────────────────
|
# ── Text helpers ─────────────────────────────────────────────────────
|
||||||
@@ -62,6 +63,11 @@ def clean_description(raw: str) -> str:
|
|||||||
text = re.sub(r"<[^>]+>", "", text)
|
text = re.sub(r"<[^>]+>", "", text)
|
||||||
text = html.unescape(text)
|
text = html.unescape(text)
|
||||||
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
||||||
|
# PeerTube enforces a 3-char minimum on descriptions; a sub-minimum
|
||||||
|
# description (e.g. a stray ".") makes the upload-init 400. Drop it so
|
||||||
|
# it's omitted from the request rather than rejected.
|
||||||
|
if len(text) < PT_DESC_MIN:
|
||||||
|
return ""
|
||||||
return text[:10000]
|
return text[:10000]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+3263
-13
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user