mirror of
https://github.com/HugeFrog24/jailbirdz-dl.git
synced 2026-03-02 01:04:31 +00:00
Caching added
This commit is contained in:
132
total_size.py
132
total_size.py
@@ -4,15 +4,19 @@ Importable function:
|
||||
summarize_sizes(sizes) - return dict with total, smallest, largest, average, failed
|
||||
"""
|
||||
|
||||
from typing import Optional, TypedDict
|
||||
import argparse
|
||||
import time
|
||||
from typing import Any, TypedDict
|
||||
|
||||
from check_clashes import (
|
||||
fmt_size,
|
||||
fetch_sizes,
|
||||
load_video_map,
|
||||
save_video_map,
|
||||
build_url_referers,
|
||||
VIDEO_MAP_FILE,
|
||||
)
|
||||
from config import SITES, SIZE_CACHE_TTL
|
||||
|
||||
|
||||
class SizeStats(TypedDict):
|
||||
@@ -25,7 +29,7 @@ class SizeStats(TypedDict):
|
||||
failed: list[str]
|
||||
|
||||
|
||||
def summarize_sizes(sizes: dict[str, Optional[int]]) -> SizeStats:
|
||||
def summarize_sizes(sizes: dict[str, int | None]) -> SizeStats:
|
||||
"""Given {url: size_or_None}, return a stats dict."""
|
||||
known = {u: s for u, s in sizes.items() if s is not None}
|
||||
failed = [u for u, s in sizes.items() if s is None]
|
||||
@@ -51,6 +55,13 @@ def summarize_sizes(sizes: dict[str, Optional[int]]) -> SizeStats:
|
||||
}
|
||||
|
||||
|
||||
def _is_stale(vid: dict[str, Any], now: int) -> bool:
|
||||
"""True if the cached size is absent or older than SIZE_CACHE_TTL seconds."""
|
||||
if vid.get("size") is None:
|
||||
return True
|
||||
return (now - vid.get("size_checked_at", 0)) >= SIZE_CACHE_TTL
|
||||
|
||||
|
||||
# --------------- CLI ---------------
|
||||
|
||||
|
||||
@@ -59,24 +70,7 @@ def _progress(done: int, total: int) -> None:
|
||||
print(f" {done}/{total}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
vm = load_video_map()
|
||||
urls: list[str] = [
|
||||
u
|
||||
for entry in vm.values()
|
||||
for u in entry.get("videos", [])
|
||||
if u.startswith("http")
|
||||
]
|
||||
|
||||
url_referers = build_url_referers(vm)
|
||||
print(f"[+] {len(urls)} URLs in {VIDEO_MAP_FILE}")
|
||||
print("[+] Fetching file sizes (20 threads)…\n")
|
||||
|
||||
sizes = fetch_sizes(
|
||||
urls, workers=20, on_progress=_progress, url_referers=url_referers
|
||||
)
|
||||
stats = summarize_sizes(sizes)
|
||||
|
||||
def _print_stats(stats: SizeStats) -> None:
|
||||
print(f"\n{'=' * 45}")
|
||||
print(f" Sized: {stats['sized']}/{stats['total']} files")
|
||||
print(f" Total: {fmt_size(stats['total_bytes'])}")
|
||||
@@ -84,12 +78,108 @@ def main() -> None:
|
||||
print(f" Largest: {fmt_size(stats['largest'])}")
|
||||
print(f" Average: {fmt_size(stats['average'])}")
|
||||
print(f"{'=' * 45}")
|
||||
|
||||
if stats["failed"]:
|
||||
print(f"\n[!] {len(stats['failed'])} URL(s) could not be sized:")
|
||||
for u in stats["failed"]:
|
||||
print(f" {u}")
|
||||
|
||||
|
||||
def _cache_hint(fresh: int, stale: int, missing: int) -> str:
|
||||
parts = [label for count, label in [(fresh, f"{fresh} fresh"), (stale, f"{stale} stale"), (missing, f"{missing} missing")] if count]
|
||||
if stale or missing:
|
||||
suffix = " — run --write to refresh" if stale else " — run --write to probe missing"
|
||||
else:
|
||||
suffix = " — all current"
|
||||
return f"Cache: {', '.join(parts)}{suffix}"
|
||||
|
||||
|
||||
def _run_stats() -> None:
|
||||
vm = load_video_map()
|
||||
now = int(time.time())
|
||||
sizes: dict[str, int | None] = {}
|
||||
fresh = stale = missing = 0
|
||||
for entry in vm.values():
|
||||
for vid in entry.get("videos", []):
|
||||
url = vid["url"]
|
||||
if url in sizes:
|
||||
continue
|
||||
sizes[url] = vid.get("size")
|
||||
if vid.get("size") is None:
|
||||
missing += 1
|
||||
elif _is_stale(vid, now):
|
||||
stale += 1
|
||||
else:
|
||||
fresh += 1
|
||||
|
||||
print(f"[+] {len(sizes)} URLs in {VIDEO_MAP_FILE}")
|
||||
print(f" {_cache_hint(fresh, stale, missing)}")
|
||||
_print_stats(summarize_sizes(sizes))
|
||||
|
||||
|
||||
def _apply_fetched(vm: dict[str, Any], fetched: dict[str, int | None], now: int) -> None:
|
||||
for entry in vm.values():
|
||||
for vid in entry.get("videos", []):
|
||||
if vid["url"] in fetched:
|
||||
vid["size"] = fetched[vid["url"]]
|
||||
vid["size_checked_at"] = now
|
||||
|
||||
|
||||
def _run_write() -> None:
|
||||
"""Probe uncached sizes and write them into video_map.json."""
|
||||
now = int(time.time())
|
||||
all_fetched: dict[str, int | None] = {}
|
||||
|
||||
for site_key in SITES:
|
||||
vm = load_video_map(site_key)
|
||||
if not vm:
|
||||
continue
|
||||
|
||||
url_referers = build_url_referers(vm)
|
||||
|
||||
to_probe: list[str] = [
|
||||
vid["url"]
|
||||
for entry in vm.values()
|
||||
for vid in entry.get("videos", [])
|
||||
if _is_stale(vid, now)
|
||||
]
|
||||
cached_count = sum(
|
||||
1
|
||||
for entry in vm.values()
|
||||
for vid in entry.get("videos", [])
|
||||
if not _is_stale(vid, now)
|
||||
)
|
||||
print(f"[{site_key}] {cached_count} cached, {len(to_probe)} to probe…")
|
||||
|
||||
fetched: dict[str, int | None] = {}
|
||||
if to_probe:
|
||||
fetched = fetch_sizes(
|
||||
to_probe, workers=20, on_progress=_progress, url_referers=url_referers
|
||||
)
|
||||
|
||||
_apply_fetched(vm, fetched, now)
|
||||
save_video_map(vm, site_key)
|
||||
all_fetched.update(fetched)
|
||||
print(f"[{site_key}] Written.")
|
||||
|
||||
if all_fetched:
|
||||
_print_stats(summarize_sizes(all_fetched))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Calculate total video download size")
|
||||
parser.add_argument(
|
||||
"--write",
|
||||
"-w",
|
||||
action="store_true",
|
||||
help="Probe uncached sizes and write them into video_map.json",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.write:
|
||||
_run_write()
|
||||
else:
|
||||
_run_stats()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user