import os, time, json, tarfile, tempfile, shutil from urllib.parse import urlparse import requests OUT_DIR = os.getenv("OUT_DIR", "/data") LICENSE_KEY = os.getenv("MAXMIND_LICENSE_KEY", "").strip() PDB_API_KEY = os.getenv("PDB_API_KEY", "").strip() PDB_BASE = os.getenv("PDB_BASE", "https://www.peeringdb.com") INFO_TYPE = os.getenv("PDB_INFO_TYPE", "Educational/Research") TIMEOUT = int(os.getenv("HTTP_TIMEOUT", "30")) LIMIT = int(os.getenv("PDB_LIMIT", "250")) HS_KOMPASS_URL = os.getenv("HS_KOMPASS_URL", "https://hs-kompass.de/kompass/xml/download/hs_liste.txt") UNI_DOMAIN_COUNTRIES = os.getenv("UNI_DOMAIN_COUNTRIES", "DE,AT") def atomic_replace(src_path: str, dst_path: str) -> None: os.makedirs(os.path.dirname(dst_path), exist_ok=True) tmp = dst_path + ".tmp" shutil.copyfile(src_path, tmp) os.replace(tmp, dst_path) os.chmod(dst_path, 0o644) def download_maxmind_mmdb() -> None: if not LICENSE_KEY: raise RuntimeError("MAXMIND_LICENSE_KEY missing") # Offizieller GeoLite2 Download-Mechanismus per license_key + edition_id url = ( "https://download.maxmind.com/app/geoip_download" f"?edition_id=GeoLite2-ASN&license_key={LICENSE_KEY}&suffix=tar.gz" ) with tempfile.TemporaryDirectory() as td: tgz = os.path.join(td, "GeoLite2-ASN.tar.gz") r = requests.get(url, timeout=TIMEOUT) if r.status_code == 429: existing = os.path.join(OUT_DIR, "GeoLite2-ASN.mmdb") if os.path.exists(existing): print("[warn] MaxMind rate limited (429); keeping existing mmdb") return raise RuntimeError("MaxMind rate limited (429) and no existing mmdb") r.raise_for_status() with open(tgz, "wb") as f: f.write(r.content) mmdb_found = None with tarfile.open(tgz, "r:gz") as tar: for member in tar.getmembers(): if member.name.endswith("GeoLite2-ASN.mmdb"): tar.extract(member, path=td) mmdb_found = os.path.join(td, member.name) break if not mmdb_found or not os.path.exists(mmdb_found): raise RuntimeError("GeoLite2-ASN.mmdb not found in archive") atomic_replace(mmdb_found, os.path.join(OUT_DIR, "GeoLite2-ASN.mmdb")) def pdb_headers(): if not PDB_API_KEY: return {"Accept": "application/json"} # PeeringDB API Key (optional) return {"Accept": "application/json", "Authorization": f"Api-Key {PDB_API_KEY}"} def fetch_pdb_page(skip: int, info_type: str): url = f"{PDB_BASE}/api/net" params = { "info_type": info_type, "limit": LIMIT, "skip": skip, "fields": "asn,status,info_type", } r = requests.get(url, params=params, headers=pdb_headers(), timeout=TIMEOUT) r.raise_for_status() j = r.json() return j.get("data", []) def update_nren_asns() -> str: info_types = [INFO_TYPE] # Alternate label seen in PeeringDB deployments. if INFO_TYPE != "Research and Education": info_types.append("Research and Education") if INFO_TYPE != "Educational/Research": info_types.append("Educational/Research") asns = set() used_info_type = INFO_TYPE for info_type in info_types: asns.clear() skip = 0 while True: data = fetch_pdb_page(skip, info_type) for obj in data: if obj.get("status") != "ok": continue asn = obj.get("asn") if isinstance(asn, int) and asn > 0: asns.add(asn) if len(data) < LIMIT: break skip += LIMIT time.sleep(1.1) # sehr konservativ if asns: used_info_type = info_type break if not asns: print(f"[warn] no ASNs found for info_type(s)={info_types}") out_txt = os.path.join(OUT_DIR, "nren_asns.txt") with tempfile.NamedTemporaryFile("w", delete=False, dir=OUT_DIR) as f: for a in sorted(asns): f.write(f"{a}\n") tmp_path = f.name os.replace(tmp_path, out_txt) os.chmod(out_txt, 0o644) return used_info_type def update_uni_domains() -> int: country_set = {c.strip() for c in UNI_DOMAIN_COUNTRIES.split(",")} hs_domains: set = set() try: r = requests.get(HS_KOMPASS_URL, timeout=TIMEOUT) r.raise_for_status() for line in r.text.splitlines(): parts = line.split("\t") if len(parts) <= 20: continue homepage = parts[20].strip() if not homepage: continue try: if not homepage.startswith(("http://", "https://")): homepage = "http://" + homepage parsed = urlparse(homepage) hostname = (parsed.hostname or "").lower() if hostname.startswith("www."): hostname = hostname[4:] if hostname: hs_domains.add(hostname) except Exception: continue except Exception as err: print(f"[warn] hs-kompass fetch failed: {err}") hipo_domains: set = set() try: r = requests.get( "https://raw.githubusercontent.com/Hipo/university-domains-list/master/world_universities_and_domains.json", timeout=TIMEOUT, ) r.raise_for_status() for entry in r.json(): if entry.get("alpha_two_code") in country_set: for d in entry.get("domains", []): hipo_domains.add(d.lower().strip()) except Exception as err: print(f"[warn] hipo fetch failed: {err}") if len(hs_domains) == 0 or len(hipo_domains) == 0: print(f"[warn] uni_domains: hs_kompass={len(hs_domains)} hipo={len(hipo_domains)}") merged = hs_domains | hipo_domains if len(merged) == 0: print("[warn] uni_domains update produced 0 entries — skipping write to preserve existing file") return 0 out_txt = os.path.join(OUT_DIR, "uni_domains.txt") with tempfile.NamedTemporaryFile("w", delete=False, dir=OUT_DIR) as f: for d in sorted(merged): f.write(f"{d}\n") tmp_path = f.name os.replace(tmp_path, out_txt) os.chmod(out_txt, 0o644) meta = { "hs_kompass": len(hs_domains), "hipo": len(hipo_domains), "total": len(merged), "updated_at_unix": int(time.time()), } meta_path = os.path.join(OUT_DIR, "uni_domains_meta.json") with open(meta_path, "w") as f: json.dump(meta, f) os.chmod(meta_path, 0o644) return len(merged) def write_meta(info_type: str): meta = { "updated_at_unix": int(time.time()), "info_type": info_type, "pdb_base": PDB_BASE, } with open(os.path.join(OUT_DIR, "metadata.json"), "w") as f: json.dump(meta, f, indent=2) os.chmod(os.path.join(OUT_DIR, "metadata.json"), 0o644) def main(): os.makedirs(OUT_DIR, exist_ok=True) download_maxmind_mmdb() used_info_type = update_nren_asns() domain_count = update_uni_domains() write_meta(used_info_type) print(f"[ok] updated mmdb + nren_asns + uni_domains (domain_count={domain_count})") if __name__ == "__main__": main()