mirror of
https://github.com/HugeFrog24/jailbirdz-dl.git
synced 2026-03-02 09:04:33 +00:00
Cookie validation logic
This commit is contained in:
@@ -9,38 +9,58 @@ Importable functions:
|
||||
fetch_sizes(urls, workers, on_progress) - bulk size lookup
|
||||
make_session() - requests.Session with required headers
|
||||
load_video_map() - load video_map.json, returns {} on missing/corrupt
|
||||
is_valid_url(url) - True if url is a plain http(s) URL with no HTML artefacts
|
||||
expects_video(url) - True if url is a members-only video page
|
||||
"""
|
||||
|
||||
from collections import defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from pathlib import Path, PurePosixPath
|
||||
from typing import Any, Callable, Optional, cast
|
||||
from urllib.parse import urlparse, unquote
|
||||
import json
|
||||
import requests
|
||||
from config import BASE_URL
|
||||
|
||||
REFERER = f"{BASE_URL}/"
|
||||
VIDEO_MAP_FILE = "video_map.json"
|
||||
VIDEO_EXTS = {".mp4", ".mov", ".m4v", ".webm", ".avi"}
|
||||
REFERER: str = f"{BASE_URL}/"
|
||||
VIDEO_MAP_FILE: str = "video_map.json"
|
||||
VIDEO_EXTS: set[str] = {".mp4", ".mov", ".m4v", ".webm", ".avi"}
|
||||
|
||||
|
||||
def load_video_map():
|
||||
def is_valid_url(url: str) -> bool:
|
||||
"""True if url is a plain http(s) URL with no HTML artefacts (<, >, href= etc.)."""
|
||||
return (
|
||||
url.startswith("http")
|
||||
and "<" not in url
|
||||
and ">" not in url
|
||||
and " href=" not in url
|
||||
)
|
||||
|
||||
|
||||
def expects_video(url: str) -> bool:
|
||||
"""True if url is a members-only video page that should contain a video."""
|
||||
return "/pinkcuffs-videos/" in url
|
||||
|
||||
|
||||
def load_video_map() -> dict[str, Any]:
|
||||
if Path(VIDEO_MAP_FILE).exists():
|
||||
try:
|
||||
with open(VIDEO_MAP_FILE, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
data_any: Any = json.load(f)
|
||||
data = cast(dict[str, Any], data_any)
|
||||
return data
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
def make_session():
|
||||
def make_session() -> requests.Session:
|
||||
s = requests.Session()
|
||||
s.headers.update({"Referer": REFERER})
|
||||
return s
|
||||
|
||||
|
||||
def fmt_size(b):
|
||||
def fmt_size(b: float | int) -> str:
|
||||
for unit in ("B", "KB", "MB", "GB"):
|
||||
if b < 1024:
|
||||
return f"{b:.1f} {unit}"
|
||||
@@ -48,30 +68,34 @@ def fmt_size(b):
|
||||
return f"{b:.1f} TB"
|
||||
|
||||
|
||||
def url_to_filename(url):
|
||||
def url_to_filename(url: str) -> str:
|
||||
return unquote(PurePosixPath(urlparse(url).path).name)
|
||||
|
||||
|
||||
def find_clashes(urls):
|
||||
def find_clashes(urls: list[str]) -> dict[str, list[str]]:
|
||||
# Case-insensitive grouping so that e.g. "DaisyArrest.mp4" and
|
||||
# "daisyarrest.mp4" are treated as a clash. This is required for
|
||||
# correctness on case-insensitive filesystems (NTFS, exFAT, macOS HFS+)
|
||||
# and harmless on case-sensitive ones (ext4) — the actual filenames on
|
||||
# disk keep their original casing; only the clash *detection* is folded.
|
||||
by_lower = defaultdict(list)
|
||||
by_lower: defaultdict[str, list[str]] = defaultdict(list)
|
||||
for url in urls:
|
||||
by_lower[url_to_filename(url).lower()].append(url)
|
||||
return {url_to_filename(srcs[0]): srcs
|
||||
for srcs in by_lower.values() if len(srcs) > 1}
|
||||
return {
|
||||
url_to_filename(srcs[0]): srcs for srcs in by_lower.values() if len(srcs) > 1
|
||||
}
|
||||
|
||||
|
||||
def _clash_subfolder(url):
|
||||
def _clash_subfolder(url: str) -> str:
|
||||
"""Parent path segment used as disambiguator for clashing filenames."""
|
||||
parts = urlparse(url).path.rstrip("/").split("/")
|
||||
return unquote(parts[-2]) if len(parts) >= 2 else "unknown"
|
||||
|
||||
|
||||
def build_download_paths(urls, output_dir):
|
||||
def build_download_paths(
|
||||
urls: list[str],
|
||||
output_dir: str | Path,
|
||||
) -> dict[str, Path]:
|
||||
"""Map each URL to a local file path. Flat layout; clashing names get a subfolder."""
|
||||
clashes = find_clashes(urls)
|
||||
clash_lower = {name.lower() for name in clashes}
|
||||
@@ -86,7 +110,10 @@ def build_download_paths(urls, output_dir):
|
||||
return paths
|
||||
|
||||
|
||||
def get_remote_size(session, url):
|
||||
def get_remote_size(
|
||||
session: requests.Session,
|
||||
url: str,
|
||||
) -> Optional[int]:
|
||||
try:
|
||||
r = session.head(url, allow_redirects=True, timeout=15)
|
||||
if r.status_code < 400 and "Content-Length" in r.headers:
|
||||
@@ -94,8 +121,7 @@ def get_remote_size(session, url):
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
r = session.get(
|
||||
url, headers={"Range": "bytes=0-0"}, stream=True, timeout=15)
|
||||
r = session.get(url, headers={"Range": "bytes=0-0"}, stream=True, timeout=15)
|
||||
r.close()
|
||||
cr = r.headers.get("Content-Range", "")
|
||||
if "/" in cr:
|
||||
@@ -105,10 +131,14 @@ def get_remote_size(session, url):
|
||||
return None
|
||||
|
||||
|
||||
def fetch_sizes(urls, workers=20, on_progress=None):
|
||||
def fetch_sizes(
|
||||
urls: list[str],
|
||||
workers: int = 20,
|
||||
on_progress: Optional[Callable[[int, int], None]] = None,
|
||||
) -> dict[str, Optional[int]]:
|
||||
"""Return {url: size_or_None}. on_progress(done, total) called after each URL."""
|
||||
session = make_session()
|
||||
sizes = {}
|
||||
sizes: dict[str, Optional[int]] = {}
|
||||
total = len(urls)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=workers) as pool:
|
||||
@@ -117,7 +147,7 @@ def fetch_sizes(urls, workers=20, on_progress=None):
|
||||
for fut in as_completed(futures):
|
||||
sizes[futures[fut]] = fut.result()
|
||||
done += 1
|
||||
if on_progress:
|
||||
if on_progress is not None:
|
||||
on_progress(done, total)
|
||||
|
||||
return sizes
|
||||
@@ -125,14 +155,20 @@ def fetch_sizes(urls, workers=20, on_progress=None):
|
||||
|
||||
# --------------- CLI ---------------
|
||||
|
||||
def main():
|
||||
|
||||
def main() -> None:
|
||||
vm = load_video_map()
|
||||
urls = [u for entry in vm.values() for u in entry.get("videos", []) if u.startswith("http")]
|
||||
urls = [
|
||||
u
|
||||
for entry in vm.values()
|
||||
for u in entry.get("videos", [])
|
||||
if u.startswith("http")
|
||||
]
|
||||
|
||||
clashes = find_clashes(urls)
|
||||
|
||||
print(f"Total URLs: {len(urls)}")
|
||||
by_name = defaultdict(list)
|
||||
by_name: defaultdict[str, list[str]] = defaultdict(list)
|
||||
for url in urls:
|
||||
by_name[url_to_filename(url)].append(url)
|
||||
print(f"Unique filenames: {len(by_name)}")
|
||||
|
||||
Reference in New Issue
Block a user