Write-through cache support

This commit is contained in:
HugeFrog24
2026-03-01 13:44:31 +01:00
parent 27f7beb8b7
commit de10d66650
2 changed files with 32 additions and 9 deletions

View File

@@ -18,6 +18,7 @@ from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any from typing import Any
import requests import requests
import time
from check_clashes import ( from check_clashes import (
make_session, make_session,
@@ -28,6 +29,7 @@ from check_clashes import (
build_url_referers, build_url_referers,
fetch_sizes, fetch_sizes,
load_video_map, load_video_map,
save_video_map,
is_valid_url, is_valid_url,
VIDEO_MAP_FILE, VIDEO_MAP_FILE,
) )
@@ -288,6 +290,25 @@ def build_url_title_map(video_map: dict[str, Any]) -> dict[str, str]:
return url_title return url_title
def _persist_fetched_sizes(newly_fetched: dict[str, int | None]) -> None:
"""Write newly probed sizes back to video_map.json (successful probes only)."""
now = int(time.time())
for site_key in SITES:
vm_site = load_video_map(site_key)
changed = False
for entry in vm_site.values():
for vid in entry.get("videos", []):
if vid["url"] in newly_fetched and vid.get("size") is None and newly_fetched[vid["url"]] is not None:
vid["size"] = newly_fetched[vid["url"]]
vid["size_checked_at"] = now
changed = True
if changed:
save_video_map(vm_site, site_key)
n_saved = sum(1 for s in newly_fetched.values() if s is not None)
if n_saved:
print(f"[+] Cached {n_saved} newly probed size(s).")
def build_url_to_site() -> dict[str, str]: def build_url_to_site() -> dict[str, str]:
"""Return {cdn_video_url: site_key} by loading each site's map in turn.""" """Return {cdn_video_url: site_key} by loading each site's map in turn."""
result: dict[str, str] = {} result: dict[str, str] = {}
@@ -423,16 +444,16 @@ def main() -> None:
if vid.get("size") is not None if vid.get("size") is not None
} }
newly_fetched: dict[str, int | None] = {}
uncached_pending = [u for u in pending if u not in cached_sizes] uncached_pending = [u for u in pending if u not in cached_sizes]
session = make_session() session = make_session()
if uncached_pending: if uncached_pending:
print( print(
f"\n[+] Fetching remote file sizes ({len(uncached_pending)} uncached, {len(pending) - len(uncached_pending)} cached)…" f"\n[+] Fetching remote file sizes ({len(uncached_pending)} uncached, {len(pending) - len(uncached_pending)} cached)…"
) )
remote_sizes: dict[str, int | None] = { fetched_pending = fetch_sizes(uncached_pending, workers=20, url_referers=url_referers)
**cached_sizes, newly_fetched.update(fetched_pending)
**fetch_sizes(uncached_pending, workers=20, url_referers=url_referers), remote_sizes: dict[str, int | None] = {**cached_sizes, **fetched_pending}
}
else: else:
print(f"\n[+] All {len(pending)} pending sizes cached — skipping probe.") print(f"\n[+] All {len(pending)} pending sizes cached — skipping probe.")
remote_sizes = dict(cached_sizes) remote_sizes = dict(cached_sizes)
@@ -447,10 +468,9 @@ def main() -> None:
print( print(
f"[+] Verifying {len(already)} existing files ({len(uncached_already)} uncached)…" f"[+] Verifying {len(already)} existing files ({len(uncached_already)} uncached)…"
) )
already_sizes: dict[str, int | None] = { fetched_already = fetch_sizes(uncached_already, workers=20, url_referers=url_referers)
**cached_sizes, newly_fetched.update(fetched_already)
**fetch_sizes(uncached_already, workers=20, url_referers=url_referers), already_sizes: dict[str, int | None] = {**cached_sizes, **fetched_already}
}
else: else:
print(f"[+] Verifying {len(already)} existing files (all sizes cached)…") print(f"[+] Verifying {len(already)} existing files (all sizes cached)…")
already_sizes = dict(cached_sizes) already_sizes = dict(cached_sizes)
@@ -472,6 +492,9 @@ def main() -> None:
if mismatched: if mismatched:
print(f"[!] {mismatched} file(s) will be re-downloaded due to size mismatch") print(f"[!] {mismatched} file(s) will be re-downloaded due to size mismatch")
if newly_fetched:
_persist_fetched_sizes(newly_fetched)
print(f"\n[⚡] Downloading with {args.workers} threads…\n") print(f"\n[⚡] Downloading with {args.workers} threads…\n")
completed = 0 completed = 0

View File

@@ -59,7 +59,7 @@ def _is_stale(vid: dict[str, Any], now: int) -> bool:
"""True if the cached size is absent or older than SIZE_CACHE_TTL seconds.""" """True if the cached size is absent or older than SIZE_CACHE_TTL seconds."""
if vid.get("size") is None: if vid.get("size") is None:
return True return True
return (now - vid.get("size_checked_at", 0)) >= SIZE_CACHE_TTL return (now - int(vid.get("size_checked_at", 0))) >= SIZE_CACHE_TTL
# --------------- CLI --------------- # --------------- CLI ---------------