136 lines
3.7 KiB
Python
136 lines
3.7 KiB
Python
import os
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
import requests
|
|
import yaml
|
|
from tqdm import tqdm
|
|
|
|
CONFIG_PATH = "config.yaml"
|
|
BASE_DOWNLOAD_DIR = Path("downloads") # semua file masuk ke sini
|
|
|
|
|
|
def load_config(path: str):
|
|
with open(path, "r") as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
if isinstance(data, list):
|
|
return data
|
|
elif isinstance(data, dict) and "items" in data:
|
|
return data["items"]
|
|
else:
|
|
raise ValueError("Format config.yaml tidak dikenali")
|
|
|
|
|
|
def get_filename_from_url(url: str) -> str:
|
|
parsed = urlparse(url)
|
|
name = os.path.basename(parsed.path)
|
|
if not name:
|
|
name = "downloaded_file"
|
|
return name
|
|
|
|
|
|
def build_filename(url: str, name_from_config: str | None) -> str:
|
|
"""
|
|
Tentukan nama file final:
|
|
- kalau name di config kosong -> pakai nama dari URL
|
|
- kalau name TIDAK punya ekstensi -> pakai name + ekstensi dari URL
|
|
- kalau name sudah ada ekstensi -> pakai name apa adanya
|
|
"""
|
|
url_name = get_filename_from_url(url)
|
|
url_root, url_ext = os.path.splitext(url_name)
|
|
|
|
if not name_from_config:
|
|
return url_name
|
|
|
|
cfg_root, cfg_ext = os.path.splitext(name_from_config)
|
|
|
|
# kalau nggak ada ekstensi di name config → pakai ekstensi dari URL
|
|
if not cfg_ext and url_ext:
|
|
return cfg_root + url_ext
|
|
|
|
# kalau sudah ada ekstensi → pakai apa adanya
|
|
return name_from_config
|
|
|
|
|
|
def maybe_get_filename_from_headers(response: requests.Response, fallback: str) -> str:
|
|
cd = response.headers.get("content-disposition")
|
|
if not cd:
|
|
return fallback
|
|
|
|
parts = cd.split(";")
|
|
for part in parts:
|
|
part = part.strip()
|
|
if part.lower().startswith("filename="):
|
|
filename = part.split("=", 1)[1].strip().strip('"')
|
|
if filename:
|
|
return filename
|
|
return fallback
|
|
|
|
|
|
def download_file(url: str, target_dir: Path, name_from_config: str | None = None):
|
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Tentukan nama file awal (dari URL + name di config)
|
|
planned_name = build_filename(url, name_from_config)
|
|
dest_path = target_dir / planned_name
|
|
|
|
# Cek apakah file sudah ada
|
|
if dest_path.exists():
|
|
print(f"[SKIP] File already exists: {dest_path}")
|
|
return
|
|
|
|
print(f"[INFO] Downloading: {url}")
|
|
resp = requests.get(url, stream=True)
|
|
resp.raise_for_status()
|
|
|
|
# Kalau server kasih nama file di header, kita override,
|
|
# tapi hanya kalau user TIDAK set 'name' di config.
|
|
if name_from_config:
|
|
filename = planned_name
|
|
else:
|
|
filename = maybe_get_filename_from_headers(resp, planned_name)
|
|
|
|
dest_path = target_dir / filename
|
|
|
|
total_size = int(resp.headers.get("content-length", 0))
|
|
chunk_size = 8192
|
|
|
|
progress = tqdm(
|
|
total=total_size if total_size > 0 else None,
|
|
unit="B",
|
|
unit_scale=True,
|
|
desc=str(filename),
|
|
)
|
|
|
|
with open(dest_path, "wb") as f:
|
|
for chunk in resp.iter_content(chunk_size=chunk_size):
|
|
if chunk:
|
|
f.write(chunk)
|
|
progress.update(len(chunk))
|
|
|
|
progress.close()
|
|
print(f"[OK] Saved to: {dest_path}")
|
|
|
|
|
|
def main():
|
|
print(f"[INFO] Loading config from {CONFIG_PATH}")
|
|
entries = load_config(CONFIG_PATH)
|
|
|
|
for entry in entries:
|
|
url = entry.get("url")
|
|
folder = entry.get("folder", "")
|
|
name = entry.get("name") # boleh None
|
|
|
|
if not url:
|
|
print("[WARN] Entry tanpa URL, di-skip:", entry)
|
|
continue
|
|
|
|
target_dir = BASE_DOWNLOAD_DIR / folder
|
|
download_file(url, target_dir, name_from_config=name)
|
|
|
|
print("[DONE] Semua download selesai.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|