Files
image-uploader/uploader.py

136 lines
3.7 KiB
Python

import os
from pathlib import Path
from urllib.parse import urlparse
import requests
import yaml
from tqdm import tqdm
CONFIG_PATH = "config.yaml"
BASE_DOWNLOAD_DIR = Path("downloads") # semua file masuk ke sini
def load_config(path: str):
with open(path, "r") as f:
data = yaml.safe_load(f)
if isinstance(data, list):
return data
elif isinstance(data, dict) and "items" in data:
return data["items"]
else:
raise ValueError("Format config.yaml tidak dikenali")
def get_filename_from_url(url: str) -> str:
parsed = urlparse(url)
name = os.path.basename(parsed.path)
if not name:
name = "downloaded_file"
return name
def build_filename(url: str, name_from_config: str | None) -> str:
"""
Tentukan nama file final:
- kalau name di config kosong -> pakai nama dari URL
- kalau name TIDAK punya ekstensi -> pakai name + ekstensi dari URL
- kalau name sudah ada ekstensi -> pakai name apa adanya
"""
url_name = get_filename_from_url(url)
url_root, url_ext = os.path.splitext(url_name)
if not name_from_config:
return url_name
cfg_root, cfg_ext = os.path.splitext(name_from_config)
# kalau nggak ada ekstensi di name config → pakai ekstensi dari URL
if not cfg_ext and url_ext:
return cfg_root + url_ext
# kalau sudah ada ekstensi → pakai apa adanya
return name_from_config
def maybe_get_filename_from_headers(response: requests.Response, fallback: str) -> str:
cd = response.headers.get("content-disposition")
if not cd:
return fallback
parts = cd.split(";")
for part in parts:
part = part.strip()
if part.lower().startswith("filename="):
filename = part.split("=", 1)[1].strip().strip('"')
if filename:
return filename
return fallback
def download_file(url: str, target_dir: Path, name_from_config: str | None = None):
target_dir.mkdir(parents=True, exist_ok=True)
# Tentukan nama file awal (dari URL + name di config)
planned_name = build_filename(url, name_from_config)
dest_path = target_dir / planned_name
# Cek apakah file sudah ada
if dest_path.exists():
print(f"[SKIP] File already exists: {dest_path}")
return
print(f"[INFO] Downloading: {url}")
resp = requests.get(url, stream=True)
resp.raise_for_status()
# Kalau server kasih nama file di header, kita override,
# tapi hanya kalau user TIDAK set 'name' di config.
if name_from_config:
filename = planned_name
else:
filename = maybe_get_filename_from_headers(resp, planned_name)
dest_path = target_dir / filename
total_size = int(resp.headers.get("content-length", 0))
chunk_size = 8192
progress = tqdm(
total=total_size if total_size > 0 else None,
unit="B",
unit_scale=True,
desc=str(filename),
)
with open(dest_path, "wb") as f:
for chunk in resp.iter_content(chunk_size=chunk_size):
if chunk:
f.write(chunk)
progress.update(len(chunk))
progress.close()
print(f"[OK] Saved to: {dest_path}")
def main():
print(f"[INFO] Loading config from {CONFIG_PATH}")
entries = load_config(CONFIG_PATH)
for entry in entries:
url = entry.get("url")
folder = entry.get("folder", "")
name = entry.get("name") # boleh None
if not url:
print("[WARN] Entry tanpa URL, di-skip:", entry)
continue
target_dir = BASE_DOWNLOAD_DIR / folder
download_file(url, target_dir, name_from_config=name)
print("[DONE] Semua download selesai.")
if __name__ == "__main__":
main()