mirror of
https://github.com/HugeFrog24/jailbirdz-dl.git
synced 2026-03-02 09:04:33 +00:00
186 lines
5.2 KiB
Python
186 lines
5.2 KiB
Python
"""Calculate total disk space needed to download all videos.
|
|
|
|
Importable function:
|
|
summarize_sizes(sizes) - return dict with total, smallest, largest, average, failed
|
|
"""
|
|
|
|
import argparse
|
|
import time
|
|
from typing import Any, TypedDict
|
|
|
|
from check_clashes import (
|
|
fmt_size,
|
|
fetch_sizes,
|
|
load_video_map,
|
|
save_video_map,
|
|
build_url_referers,
|
|
VIDEO_MAP_FILE,
|
|
)
|
|
from config import SITES, SIZE_CACHE_TTL
|
|
|
|
|
|
class SizeStats(TypedDict):
|
|
sized: int
|
|
total: int
|
|
total_bytes: int
|
|
smallest: int
|
|
largest: int
|
|
average: int
|
|
failed: list[str]
|
|
|
|
|
|
def summarize_sizes(sizes: dict[str, int | None]) -> SizeStats:
|
|
"""Given {url: size_or_None}, return a stats dict."""
|
|
known = {u: s for u, s in sizes.items() if s is not None}
|
|
failed = [u for u, s in sizes.items() if s is None]
|
|
if not known:
|
|
return {
|
|
"sized": 0,
|
|
"total": len(sizes),
|
|
"total_bytes": 0,
|
|
"smallest": 0,
|
|
"largest": 0,
|
|
"average": 0,
|
|
"failed": failed,
|
|
}
|
|
total_bytes = sum(known.values())
|
|
return {
|
|
"sized": len(known),
|
|
"total": len(sizes),
|
|
"total_bytes": total_bytes,
|
|
"smallest": min(known.values()),
|
|
"largest": max(known.values()),
|
|
"average": total_bytes // len(known),
|
|
"failed": failed,
|
|
}
|
|
|
|
|
|
def _is_stale(vid: dict[str, Any], now: int) -> bool:
|
|
"""True if the cached size is absent or older than SIZE_CACHE_TTL seconds."""
|
|
if vid.get("size") is None:
|
|
return True
|
|
return (now - int(vid.get("size_checked_at", 0))) >= SIZE_CACHE_TTL
|
|
|
|
|
|
# --------------- CLI ---------------
|
|
|
|
|
|
def _progress(done: int, total: int) -> None:
|
|
if done % 200 == 0 or done == total:
|
|
print(f" {done}/{total}")
|
|
|
|
|
|
def _print_stats(stats: SizeStats) -> None:
|
|
print(f"\n{'=' * 45}")
|
|
print(f" Sized: {stats['sized']}/{stats['total']} files")
|
|
print(f" Total: {fmt_size(stats['total_bytes'])}")
|
|
print(f" Smallest: {fmt_size(stats['smallest'])}")
|
|
print(f" Largest: {fmt_size(stats['largest'])}")
|
|
print(f" Average: {fmt_size(stats['average'])}")
|
|
print(f"{'=' * 45}")
|
|
if stats["failed"]:
|
|
print(f"\n[!] {len(stats['failed'])} URL(s) could not be sized:")
|
|
for u in stats["failed"]:
|
|
print(f" {u}")
|
|
|
|
|
|
def _cache_hint(fresh: int, stale: int, missing: int) -> str:
|
|
parts = [label for count, label in [(fresh, f"{fresh} fresh"), (stale, f"{stale} stale"), (missing, f"{missing} missing")] if count]
|
|
if stale or missing:
|
|
suffix = " — run --write to refresh" if stale else " — run --write to probe missing"
|
|
else:
|
|
suffix = " — all current"
|
|
return f"Cache: {', '.join(parts)}{suffix}"
|
|
|
|
|
|
def _run_stats() -> None:
|
|
vm = load_video_map()
|
|
now = int(time.time())
|
|
sizes: dict[str, int | None] = {}
|
|
fresh = stale = missing = 0
|
|
for entry in vm.values():
|
|
for vid in entry.get("videos", []):
|
|
url = vid["url"]
|
|
if url in sizes:
|
|
continue
|
|
sizes[url] = vid.get("size")
|
|
if vid.get("size") is None:
|
|
missing += 1
|
|
elif _is_stale(vid, now):
|
|
stale += 1
|
|
else:
|
|
fresh += 1
|
|
|
|
print(f"[+] {len(sizes)} URLs in {VIDEO_MAP_FILE}")
|
|
print(f" {_cache_hint(fresh, stale, missing)}")
|
|
_print_stats(summarize_sizes(sizes))
|
|
|
|
|
|
def _apply_fetched(vm: dict[str, Any], fetched: dict[str, int | None], now: int) -> None:
|
|
for entry in vm.values():
|
|
for vid in entry.get("videos", []):
|
|
if vid["url"] in fetched:
|
|
vid["size"] = fetched[vid["url"]]
|
|
vid["size_checked_at"] = now
|
|
|
|
|
|
def _run_write() -> None:
|
|
"""Probe uncached sizes and write them into video_map.json."""
|
|
now = int(time.time())
|
|
all_fetched: dict[str, int | None] = {}
|
|
|
|
for site_key in SITES:
|
|
vm = load_video_map(site_key)
|
|
if not vm:
|
|
continue
|
|
|
|
url_referers = build_url_referers(vm)
|
|
|
|
to_probe: list[str] = [
|
|
vid["url"]
|
|
for entry in vm.values()
|
|
for vid in entry.get("videos", [])
|
|
if _is_stale(vid, now)
|
|
]
|
|
cached_count = sum(
|
|
1
|
|
for entry in vm.values()
|
|
for vid in entry.get("videos", [])
|
|
if not _is_stale(vid, now)
|
|
)
|
|
print(f"[{site_key}] {cached_count} cached, {len(to_probe)} to probe…")
|
|
|
|
fetched: dict[str, int | None] = {}
|
|
if to_probe:
|
|
fetched = fetch_sizes(
|
|
to_probe, workers=20, on_progress=_progress, url_referers=url_referers
|
|
)
|
|
|
|
_apply_fetched(vm, fetched, now)
|
|
save_video_map(vm, site_key)
|
|
all_fetched.update(fetched)
|
|
print(f"[{site_key}] Written.")
|
|
|
|
if all_fetched:
|
|
_print_stats(summarize_sizes(all_fetched))
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Calculate total video download size")
|
|
parser.add_argument(
|
|
"--write",
|
|
"-w",
|
|
action="store_true",
|
|
help="Probe uncached sizes and write them into video_map.json",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if args.write:
|
|
_run_write()
|
|
else:
|
|
_run_stats()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|