Friday, August 29, 2025

Python Script to Download Bergamot Models

import httpx

import tarfile

import os

from urllib.parse import urlparse

 

# https://data.statmt.org/bergamot/models/models.json

# List of model URLs

urls = [

"https://data.statmt.org/bergamot/models/csen/csen.student.base.v1.cd5418ba6a412fc7.tar.gz",

"https://data.statmt.org/bergamot/models/csen/csen.student.tiny11.v1.8f603aded58f0a3c.tar.gz",

"https://data.statmt.org/bergamot/models/csen/encs.student.base.v1.db770d87e491b0dc.tar.gz",

"https://data.statmt.org/bergamot/models/csen/encs.student.tiny11.v1.b5c1ff605296b0e5.tar.gz",

"https://data.statmt.org/bergamot/models/deen/deen.student.base.v2.caa7c0ce3c8eaf05.tar.gz",

"https://data.statmt.org/bergamot/models/deen/deen.student.tiny11.v2.9f70fcb17bf9572d.tar.gz",

"https://data.statmt.org/bergamot/models/deen/ende.student.base.v2.37b172bc9b594f9b.tar.gz",

"https://data.statmt.org/bergamot/models/deen/ende.student.tiny11.v2.93821e13b3c511b5.tar.gz",

"https://data.statmt.org/bergamot/models/esen/esen.student.tiny11.v1.09576f06d0ad805e.tar.gz",

"https://data.statmt.org/bergamot/models/esen/enes.student.tiny11.v1.a7203a8f8e9daea8.tar.gz",

"https://data.statmt.org/bergamot/models/eten/eten.student.tiny11.v1.38de61c668e42f36.tar.gz",

"https://data.statmt.org/bergamot/models/eten/enet.student.tiny11.v1.0b8f835b0c154aaa.tar.gz",

"https://data.statmt.org/bergamot/models/isen/isen.student.base.v2.536d6b8808a5c076.tar.gz",

"https://data.statmt.org/bergamot/models/isen/isen.student.tiny11.v2.829203cf37b7bdc4.tar.gz",

"https://data.statmt.org/bergamot/models/nben/nben.student.tiny11.v1.e410ce34f8337aab.tar.gz",

"https://data.statmt.org/bergamot/models/nnen/nnen.student.tiny11.v1.0efa37c16887eea4.tar.gz",

"https://data.statmt.org/bergamot/models/bgen/bgen.student.tiny11.v1.f9c89a3a25ff8dca.tar.gz",

"https://data.statmt.org/bergamot/models/bgen/enbg.student.tiny11.v1.3ea060c1b76470a7.tar.gz",

"https://data.statmt.org/bergamot/models/plen/plen.student.tiny11.v1.87148203cbda2842.tar.gz",

"https://data.statmt.org/bergamot/models/plen/enpl.student.tiny11.v1.c33219daa12e7872.tar.gz",

"https://data.statmt.org/bergamot/models/fren/fren.student.tiny11.v1.dccea16d03c0a389.tar.gz",

"https://data.statmt.org/bergamot/models/fren/enfr.student.tiny11.v1.805d112122af03d0.tar.gz",

"https://data.statmt.org/bergamot/models/hbseng/hbseng.student.tiny11.v1.fa8a29e01a5332ba.tar.gz",

"https://data.statmt.org/bergamot/models/slen/slen.student.tiny11.v1.d029034e49c3bb08.tar.gz",

"https://data.statmt.org/bergamot/models/mken/mken.student.tiny11.v1.dd03ef56f4695c7b.tar.gz",

"https://data.statmt.org/bergamot/models/mten/mten.student.tiny11.v1.4089a5a036eff1c3.tar.gz",

"https://data.statmt.org/bergamot/models/tren/tren.student.tiny11.v1.d7728d17a313230a.tar.gz",

"https://data.statmt.org/bergamot/models/sqen/sqen.student.tiny11.v1.6ead0c9b236f942b.tar.gz",

"https://data.statmt.org/bergamot/models/caen/caen.student.tiny11.v1.edaf67d1938e80d3.tar.gz",

"https://data.statmt.org/bergamot/models/elen/elen.student.tiny11.v1.0006442831596378.tar.gz",

"https://data.statmt.org/bergamot/models/uken/uken.student.tiny11.v1.108d04d1e160153a.tar.gz"

]


# Create a folder to store all models

os.makedirs("models", exist_ok=True)


for url in urls:

filename = os.path.basename(url)

folder_name = filename.replace(".tar.gz", "")

folder_path = os.path.join("models", folder_name)

os.makedirs(folder_path, exist_ok=True)


print(f"📥 Downloading {filename}...")

response = httpx.get(url)

archive_path = os.path.join(folder_path, filename)


with open(archive_path, "wb") as f:

f.write(response.content)


print(f"📦 Extracting to {folder_path}...")

with tarfile.open(archive_path, "r:gz") as tar:

tar.extractall(path=folder_path)


os.remove(archive_path)

print(f"✅ Done: {folder_name}\n")


print("🎉 All models downloaded and extracted!")