mirror of
https://github.com/HugeFrog24/jailbirdz-dl.git
synced 2026-03-02 01:04:31 +00:00
OK
This commit is contained in:
@@ -128,10 +128,11 @@ Lists filenames that map to more than one source URL, with sizes.
|
|||||||
### Estimate total download size
|
### Estimate total download size
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python total_size.py
|
python total_size.py # read cached sizes and print summary
|
||||||
|
python total_size.py --write # probe uncached/stale URLs and refresh the cache
|
||||||
```
|
```
|
||||||
|
|
||||||
Fetches `Content-Length` for every video URL in `video_map.json` and prints a size summary. Does not download anything.
|
Reads cached file sizes from `video_map.json` and prints a summary (total, smallest, largest, average). The default mode never hits the network. Use `--write` to probe any missing or stale entries and persist the results.
|
||||||
|
|
||||||
## Data files
|
## Data files
|
||||||
|
|
||||||
|
|||||||
39
download.py
39
download.py
@@ -18,6 +18,7 @@ from collections import defaultdict
|
|||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from typing import Any
|
from typing import Any
|
||||||
import requests
|
import requests
|
||||||
|
import time
|
||||||
|
|
||||||
from check_clashes import (
|
from check_clashes import (
|
||||||
make_session,
|
make_session,
|
||||||
@@ -28,6 +29,7 @@ from check_clashes import (
|
|||||||
build_url_referers,
|
build_url_referers,
|
||||||
fetch_sizes,
|
fetch_sizes,
|
||||||
load_video_map,
|
load_video_map,
|
||||||
|
save_video_map,
|
||||||
is_valid_url,
|
is_valid_url,
|
||||||
VIDEO_MAP_FILE,
|
VIDEO_MAP_FILE,
|
||||||
)
|
)
|
||||||
@@ -288,6 +290,25 @@ def build_url_title_map(video_map: dict[str, Any]) -> dict[str, str]:
|
|||||||
return url_title
|
return url_title
|
||||||
|
|
||||||
|
|
||||||
|
def _persist_fetched_sizes(newly_fetched: dict[str, int | None]) -> None:
|
||||||
|
"""Write newly probed sizes back to video_map.json (successful probes only)."""
|
||||||
|
now = int(time.time())
|
||||||
|
for site_key in SITES:
|
||||||
|
vm_site = load_video_map(site_key)
|
||||||
|
changed = False
|
||||||
|
for entry in vm_site.values():
|
||||||
|
for vid in entry.get("videos", []):
|
||||||
|
if vid["url"] in newly_fetched and vid.get("size") is None and newly_fetched[vid["url"]] is not None:
|
||||||
|
vid["size"] = newly_fetched[vid["url"]]
|
||||||
|
vid["size_checked_at"] = now
|
||||||
|
changed = True
|
||||||
|
if changed:
|
||||||
|
save_video_map(vm_site, site_key)
|
||||||
|
n_saved = sum(1 for s in newly_fetched.values() if s is not None)
|
||||||
|
if n_saved:
|
||||||
|
print(f"[+] Cached {n_saved} newly probed size(s).")
|
||||||
|
|
||||||
|
|
||||||
def build_url_to_site() -> dict[str, str]:
|
def build_url_to_site() -> dict[str, str]:
|
||||||
"""Return {cdn_video_url: site_key} by loading each site's map in turn."""
|
"""Return {cdn_video_url: site_key} by loading each site's map in turn."""
|
||||||
result: dict[str, str] = {}
|
result: dict[str, str] = {}
|
||||||
@@ -423,16 +444,16 @@ def main() -> None:
|
|||||||
if vid.get("size") is not None
|
if vid.get("size") is not None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
newly_fetched: dict[str, int | None] = {}
|
||||||
uncached_pending = [u for u in pending if u not in cached_sizes]
|
uncached_pending = [u for u in pending if u not in cached_sizes]
|
||||||
session = make_session()
|
session = make_session()
|
||||||
if uncached_pending:
|
if uncached_pending:
|
||||||
print(
|
print(
|
||||||
f"\n[+] Fetching remote file sizes ({len(uncached_pending)} uncached, {len(pending) - len(uncached_pending)} cached)…"
|
f"\n[+] Fetching remote file sizes ({len(uncached_pending)} uncached, {len(pending) - len(uncached_pending)} cached)…"
|
||||||
)
|
)
|
||||||
remote_sizes: dict[str, int | None] = {
|
fetched_pending = fetch_sizes(uncached_pending, workers=20, url_referers=url_referers)
|
||||||
**cached_sizes,
|
newly_fetched.update(fetched_pending)
|
||||||
**fetch_sizes(uncached_pending, workers=20, url_referers=url_referers),
|
remote_sizes: dict[str, int | None] = {**cached_sizes, **fetched_pending}
|
||||||
}
|
|
||||||
else:
|
else:
|
||||||
print(f"\n[+] All {len(pending)} pending sizes cached — skipping probe.")
|
print(f"\n[+] All {len(pending)} pending sizes cached — skipping probe.")
|
||||||
remote_sizes = dict(cached_sizes)
|
remote_sizes = dict(cached_sizes)
|
||||||
@@ -447,10 +468,9 @@ def main() -> None:
|
|||||||
print(
|
print(
|
||||||
f"[+] Verifying {len(already)} existing files ({len(uncached_already)} uncached)…"
|
f"[+] Verifying {len(already)} existing files ({len(uncached_already)} uncached)…"
|
||||||
)
|
)
|
||||||
already_sizes: dict[str, int | None] = {
|
fetched_already = fetch_sizes(uncached_already, workers=20, url_referers=url_referers)
|
||||||
**cached_sizes,
|
newly_fetched.update(fetched_already)
|
||||||
**fetch_sizes(uncached_already, workers=20, url_referers=url_referers),
|
already_sizes: dict[str, int | None] = {**cached_sizes, **fetched_already}
|
||||||
}
|
|
||||||
else:
|
else:
|
||||||
print(f"[+] Verifying {len(already)} existing files (all sizes cached)…")
|
print(f"[+] Verifying {len(already)} existing files (all sizes cached)…")
|
||||||
already_sizes = dict(cached_sizes)
|
already_sizes = dict(cached_sizes)
|
||||||
@@ -472,6 +492,9 @@ def main() -> None:
|
|||||||
if mismatched:
|
if mismatched:
|
||||||
print(f"[!] {mismatched} file(s) will be re-downloaded due to size mismatch")
|
print(f"[!] {mismatched} file(s) will be re-downloaded due to size mismatch")
|
||||||
|
|
||||||
|
if newly_fetched:
|
||||||
|
_persist_fetched_sizes(newly_fetched)
|
||||||
|
|
||||||
print(f"\n[⚡] Downloading with {args.workers} threads…\n")
|
print(f"\n[⚡] Downloading with {args.workers} threads…\n")
|
||||||
|
|
||||||
completed = 0
|
completed = 0
|
||||||
|
|||||||
@@ -14,10 +14,12 @@ import os
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
import requests
|
import requests
|
||||||
|
from dotenv import load_dotenv
|
||||||
from config import SITES
|
from config import SITES
|
||||||
|
|
||||||
ENV_FILE = Path(".env")
|
ENV_FILE = Path(".env")
|
||||||
COOKIE_PREFIX = "wordpress_logged_in_"
|
COOKIE_PREFIX = "wordpress_logged_in_"
|
||||||
|
load_dotenv(dotenv_path=ENV_FILE)
|
||||||
|
|
||||||
|
|
||||||
def update_env(
|
def update_env(
|
||||||
@@ -72,6 +74,9 @@ def login_and_get_cookie(
|
|||||||
"Referer": f"{base_url}/",
|
"Referer": f"{base_url}/",
|
||||||
"Origin": base_url,
|
"Origin": base_url,
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:147.0) Gecko/20100101 Firefox/147.0",
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:147.0) Gecko/20100101 Firefox/147.0",
|
||||||
|
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
||||||
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
|
"Accept": "*/*",
|
||||||
},
|
},
|
||||||
timeout=30,
|
timeout=30,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ def _is_stale(vid: dict[str, Any], now: int) -> bool:
|
|||||||
"""True if the cached size is absent or older than SIZE_CACHE_TTL seconds."""
|
"""True if the cached size is absent or older than SIZE_CACHE_TTL seconds."""
|
||||||
if vid.get("size") is None:
|
if vid.get("size") is None:
|
||||||
return True
|
return True
|
||||||
return (now - vid.get("size_checked_at", 0)) >= SIZE_CACHE_TTL
|
return (now - int(vid.get("size_checked_at", 0))) >= SIZE_CACHE_TTL
|
||||||
|
|
||||||
|
|
||||||
# --------------- CLI ---------------
|
# --------------- CLI ---------------
|
||||||
|
|||||||
Reference in New Issue
Block a user