mirror of
https://github.com/HugeFrog24/jailbirdz-dl.git
synced 2026-03-02 01:04:31 +00:00
OK
This commit is contained in:
@@ -128,10 +128,11 @@ Lists filenames that map to more than one source URL, with sizes.
|
||||
### Estimate total download size
|
||||
|
||||
```bash
|
||||
python total_size.py
|
||||
python total_size.py # read cached sizes and print summary
|
||||
python total_size.py --write # probe uncached/stale URLs and refresh the cache
|
||||
```
|
||||
|
||||
Fetches `Content-Length` for every video URL in `video_map.json` and prints a size summary. Does not download anything.
|
||||
Reads cached file sizes from `video_map.json` and prints a summary (total, smallest, largest, average). The default mode never hits the network. Use `--write` to probe any missing or stale entries and persist the results.
|
||||
|
||||
## Data files
|
||||
|
||||
|
||||
39
download.py
39
download.py
@@ -18,6 +18,7 @@ from collections import defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import Any
|
||||
import requests
|
||||
import time
|
||||
|
||||
from check_clashes import (
|
||||
make_session,
|
||||
@@ -28,6 +29,7 @@ from check_clashes import (
|
||||
build_url_referers,
|
||||
fetch_sizes,
|
||||
load_video_map,
|
||||
save_video_map,
|
||||
is_valid_url,
|
||||
VIDEO_MAP_FILE,
|
||||
)
|
||||
@@ -288,6 +290,25 @@ def build_url_title_map(video_map: dict[str, Any]) -> dict[str, str]:
|
||||
return url_title
|
||||
|
||||
|
||||
def _persist_fetched_sizes(newly_fetched: dict[str, int | None]) -> None:
|
||||
"""Write newly probed sizes back to video_map.json (successful probes only)."""
|
||||
now = int(time.time())
|
||||
for site_key in SITES:
|
||||
vm_site = load_video_map(site_key)
|
||||
changed = False
|
||||
for entry in vm_site.values():
|
||||
for vid in entry.get("videos", []):
|
||||
if vid["url"] in newly_fetched and vid.get("size") is None and newly_fetched[vid["url"]] is not None:
|
||||
vid["size"] = newly_fetched[vid["url"]]
|
||||
vid["size_checked_at"] = now
|
||||
changed = True
|
||||
if changed:
|
||||
save_video_map(vm_site, site_key)
|
||||
n_saved = sum(1 for s in newly_fetched.values() if s is not None)
|
||||
if n_saved:
|
||||
print(f"[+] Cached {n_saved} newly probed size(s).")
|
||||
|
||||
|
||||
def build_url_to_site() -> dict[str, str]:
|
||||
"""Return {cdn_video_url: site_key} by loading each site's map in turn."""
|
||||
result: dict[str, str] = {}
|
||||
@@ -423,16 +444,16 @@ def main() -> None:
|
||||
if vid.get("size") is not None
|
||||
}
|
||||
|
||||
newly_fetched: dict[str, int | None] = {}
|
||||
uncached_pending = [u for u in pending if u not in cached_sizes]
|
||||
session = make_session()
|
||||
if uncached_pending:
|
||||
print(
|
||||
f"\n[+] Fetching remote file sizes ({len(uncached_pending)} uncached, {len(pending) - len(uncached_pending)} cached)…"
|
||||
)
|
||||
remote_sizes: dict[str, int | None] = {
|
||||
**cached_sizes,
|
||||
**fetch_sizes(uncached_pending, workers=20, url_referers=url_referers),
|
||||
}
|
||||
fetched_pending = fetch_sizes(uncached_pending, workers=20, url_referers=url_referers)
|
||||
newly_fetched.update(fetched_pending)
|
||||
remote_sizes: dict[str, int | None] = {**cached_sizes, **fetched_pending}
|
||||
else:
|
||||
print(f"\n[+] All {len(pending)} pending sizes cached — skipping probe.")
|
||||
remote_sizes = dict(cached_sizes)
|
||||
@@ -447,10 +468,9 @@ def main() -> None:
|
||||
print(
|
||||
f"[+] Verifying {len(already)} existing files ({len(uncached_already)} uncached)…"
|
||||
)
|
||||
already_sizes: dict[str, int | None] = {
|
||||
**cached_sizes,
|
||||
**fetch_sizes(uncached_already, workers=20, url_referers=url_referers),
|
||||
}
|
||||
fetched_already = fetch_sizes(uncached_already, workers=20, url_referers=url_referers)
|
||||
newly_fetched.update(fetched_already)
|
||||
already_sizes: dict[str, int | None] = {**cached_sizes, **fetched_already}
|
||||
else:
|
||||
print(f"[+] Verifying {len(already)} existing files (all sizes cached)…")
|
||||
already_sizes = dict(cached_sizes)
|
||||
@@ -472,6 +492,9 @@ def main() -> None:
|
||||
if mismatched:
|
||||
print(f"[!] {mismatched} file(s) will be re-downloaded due to size mismatch")
|
||||
|
||||
if newly_fetched:
|
||||
_persist_fetched_sizes(newly_fetched)
|
||||
|
||||
print(f"\n[⚡] Downloading with {args.workers} threads…\n")
|
||||
|
||||
completed = 0
|
||||
|
||||
@@ -14,10 +14,12 @@ import os
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
from config import SITES
|
||||
|
||||
ENV_FILE = Path(".env")
|
||||
COOKIE_PREFIX = "wordpress_logged_in_"
|
||||
load_dotenv(dotenv_path=ENV_FILE)
|
||||
|
||||
|
||||
def update_env(
|
||||
@@ -72,6 +74,9 @@ def login_and_get_cookie(
|
||||
"Referer": f"{base_url}/",
|
||||
"Origin": base_url,
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:147.0) Gecko/20100101 Firefox/147.0",
|
||||
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
"Accept": "*/*",
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
@@ -59,7 +59,7 @@ def _is_stale(vid: dict[str, Any], now: int) -> bool:
|
||||
"""True if the cached size is absent or older than SIZE_CACHE_TTL seconds."""
|
||||
if vid.get("size") is None:
|
||||
return True
|
||||
return (now - vid.get("size_checked_at", 0)) >= SIZE_CACHE_TTL
|
||||
return (now - int(vid.get("size_checked_at", 0))) >= SIZE_CACHE_TTL
|
||||
|
||||
|
||||
# --------------- CLI ---------------
|
||||
|
||||
Reference in New Issue
Block a user