Tuesday, March 10, 2026

letsmt.eu - EU Presidency (deprecated)

 #!/usr/bin/env python3
#PYTHONIOENCODING='utf-8'
#PYTHONLEGACYWINDOWSSTDIO='utf-8'
# -*- coding: utf-8 -*-

import requests
import sys
# client_id = "u-1ca29e75-8438-4878-ab63-49d31ed5442c"
# response = requests.get('https://www.letsmt.eu/ws/service.svc/json/GetSystemList',
                         # headers={'Content-Type': 'application/json',
                                  # 'client-id': client_id},
                         # json={'appID': 'Tilde|EU Presidency|Web',
                               # 'uiLanguageID': 'en',
                               # 'options': ''})
# try:
    # response.raise_for_status()
# except requests.HTTPError as e:
    # print(e.response.status_code)
    # print(e.response.content)
# systems = response.json()['System']
# for system in systems:
    # print("System for {}-{}: '{}'".format(system['SourceLanguage']['Code'],
                                          # system['TargetLanguage']['Code'],
                                          # system['Title']['Text']))
    # print("ID: {}".format(system['ID']))
    # print()
    
# client_id = "u-dc4cd3c5-ebc9-4213-ac9d-593c896bc0ea"
# client_id = "u-bd13faca-b816-4085-95d5-05373d695ab7"
# client_id = "u-1ca29e75-8438-4878-ab63-49d31ed5442c"   # from the official site

client_id = "u-1ca29e75-8438-4878-ab63-49d31ed5442c"
system_id= "smt-d57b1605-598b-46a8-8ad9-4b8e2499b9cf"      # en-ro
text = sys.argv[1]
response = requests.post('https://www.letsmt.eu/ws/service.svc/json/TranslateEx',
                         headers={'Content-Type': 'application/json',
                                  'client-id': client_id},
                         json={'appID': 'Tilde|EU Presidency|Web',
                               'systemID': system_id,
                               'text': text,
                               'options': 'alignment,markSentences'})
try:
    response.raise_for_status()
except requests.HTTPError as e:
    print(e.response.status_code)
    print(e.response.content)
# print(response.json())
translation = response.json()
print(str(translation['translation']).encode('utf-8').decode('utf-8'))

import requests
import json
import sys
import urllib.parse

sourcetext = sys.argv[1]
# print(sourcetext)
sourcetexturl = urllib.parse.quote_plus(sourcetext)
# print(sourcetexturl)
# response = requests.options(
    # 'https://letsmt.eu/ws/service.svc/json/TranslateEx',

    # headers={'Host': 'letsmt.eu', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', 'Access-Control-Request-Method': 'POST', 'Access-Control-Request-Headers': 'client-id,content-type', 'Referer': 'https://translate2018.eu/', 'Origin': 'https://translate2018.eu', 'DNT': '1', 'Connection': 'keep-alive', 'TE': 'Trailers'},
# )
# response = requests.post(
    # 'https://letsmt.eu/ws/service.svc/json/TranslateEx',

    # headers={'Host': 'letsmt.eu', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', 'Referer': 'https://translate2018.eu/', 'Content-Type': 'application/json', 'client-id': 'u-dc4cd3c5-ebc9-4213-ac9d-593c896bc0ea', 'Content-Length': '310', 'Origin': 'https://translate2018.eu', 'DNT': '1', 'Connection': 'keep-alive', 'TE': 'Trailers'},

    # data='{"appID":"Tilde|EU Presidency|Web","text":"401 - Unauthorized: Access is denied due to invalid credentials. You do not have permission to view this directory or page using the credentials that you supplied.","systemID":"smt-d57b1605-598b-46a8-8ad9-4b8e2499b9cf","options":"widget=text,alignment,markSentences"}',
# )

# response = requests.post('https://letsmt.eu/ws/service.svc/json/TranslateEx', headers=headers, data=data)

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0',
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    # 'Referer': 'https://translate2018.eu/',
    'Content-Type': 'application/json; charset=utf-8',
    'client-id': 'u-dc4cd3c5-ebc9-4213-ac9d-593c896bc0ea',
    # 'Origin': 'https://translate2018.eu',
    'DNT': '1',
    'Connection': 'keep-alive',
    'TE': 'Trailers',
}

# data = '{"appID":"Tilde|EU Presidency|Web","text":"401 - Unauthorized: Access is denied due to invalid credentials. You do not have permission to view this directory or page using the credentials that you supplied.","smt-99b2f71a-1b3b-418e-bd6b-125f61a53feb","options":"widget=text,alignment,markSentences"}'
data = '{"appID":"Tilde|EU Presidency|Web","text":"' + str(sourcetext).encode('utf-8').decode('utf-8') + '","systemID":"smt-160de000-f719-4d5b-9daa-34859345e889","options":"widget=text,alignment,markSentences"}'
# smt-160de000-f719-4d5b-9daa-34859345e889 de-en
# smt-d57b1605-598b-46a8-8ad9-4b8e2499b9cf en-ro
# smt-99b2f71a-1b3b-418e-bd6b-125f61a53feb en-de
# smt-693519e3-465c-460f-807b-3ad4736ce6b8 ro-en

response = requests.post('https://letsmt.eu/ws/service.svc/json/TranslateEx', headers=headers, data=data)

print(data)
# print(response)
print(response.text)
translation = response.json()
# print(str(translation['translation']).encode('utf-8').decode('utf-8'))
# sys.stdout.buffer.write(str(translation['translation']).encode('utf-8'))
# jsontext = json.loads(response.content)
# print(jsontext)


# curl "https://letsmt.eu/ws/service.svc/json/TranslateEx" -X OPTIONS -H "User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0" -H "Accept: */*" -H "Accept-Language: en-US,en;q=0.5" --compressed -H "Access-Control-Request-Method: POST" -H "Access-Control-Request-Headers: client-id,content-type" -H "Referer: https://translate2018.eu/?lang=en" -H "Origin: https://translate2018.eu" -H "DNT: 1" -H "Connection: keep-alive"
# curl "https://letsmt.eu/ws/service.svc/json/TranslateEx" -H "User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0" -H "Accept: */*" -H "Accept-Language: en-US,en;q=0.5" --compressed -H "Referer: https://translate2018.eu/?lang=en" -H "Content-Type: application/json" -H "client-id: u-dc4cd3c5-ebc9-4213-ac9d-593c896bc0ea" -H "Origin: https://translate2018.eu" -H "DNT: 1" -H "Connection: keep-alive" -H "TE: Trailers" --data "{""appID"":""Tilde|EU Presidency|Web"",""text"":""401 - Unauthorized: Access is denied due to invalid credentials. You do not have permission to view this directory or page using the credentials that you supplied."",""systemID"":""smt-d57b1605-598b-46a8-8ad9-4b8e2499b9cf"",""options"":""widget=text,alignment,markSentences""}"

# https://www.letsmt.eu/ws/service.svc/json/TranslateArrayEx?appID="Tilde|EU Presidency|Web"&systemID=smt-160de000-f719-4d5b-9daa-34859345e889&textArray=[Katze]&client-id=u-dc4cd3c5-ebc9-4213-ac9d-593c896bc0ea

# $publicAppid = 'wikiapp',
    # $apiUrl = 'https://letsmt.eu/ws',
    # $webIframeUrl = 'https://readymt.tilde.com',
    # $currentKey = $publicAppid + '-u-918f738b-7413-405d-acda-577ac8825db2'; // live;
    

#!/usr/bin/env python3
#PYTHONIOENCODING='utf-8'
#PYTHONLEGACYWINDOWSSTDIO='utf-8'
# -*- coding: utf-8 -*-

import requests
import json
import sys
import urllib.parse

sourcetext = sys.argv[1]
# print(sourcetext)
sourcetexturl = urllib.parse.quote_plus(sourcetext)
# print(sourcetexturl)
# response = requests.options(
    # 'https://letsmt.eu/ws/service.svc/json/TranslateEx',

    # headers={'Host': 'letsmt.eu', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', 'Access-Control-Request-Method': 'POST', 'Access-Control-Request-Headers': 'client-id,content-type', 'Referer': 'https://translate2018.eu/', 'Origin': 'https://translate2018.eu', 'DNT': '1', 'Connection': 'keep-alive', 'TE': 'Trailers'},
# )
# response = requests.post(
    # 'https://letsmt.eu/ws/service.svc/json/TranslateEx',

    # headers={'Host': 'letsmt.eu', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', 'Referer': 'https://translate2018.eu/', 'Content-Type': 'application/json', 'client-id': 'u-dc4cd3c5-ebc9-4213-ac9d-593c896bc0ea', 'Content-Length': '310', 'Origin': 'https://translate2018.eu', 'DNT': '1', 'Connection': 'keep-alive', 'TE': 'Trailers'},

    # data='{"appID":"Tilde|EU Presidency|Web","text":"401 - Unauthorized: Access is denied due to invalid credentials. You do not have permission to view this directory or page using the credentials that you supplied.","systemID":"smt-d57b1605-598b-46a8-8ad9-4b8e2499b9cf","options":"widget=text,alignment,markSentences"}',
# )

# response = requests.post('https://letsmt.eu/ws/service.svc/json/TranslateEx', headers=headers, data=data)

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0',
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    # 'Referer': 'https://translate2018.eu/',
    'Content-Type': 'application/json; charset=utf-8',
    'client-id': 'u-dc4cd3c5-ebc9-4213-ac9d-593c896bc0ea',
    # 'Origin': 'https://translate2018.eu',
    'DNT': '1',
    'Connection': 'keep-alive',
    'TE': 'Trailers',
}

# data = '{"appID":"Tilde|EU Presidency|Web","text":"401 - Unauthorized: Access is denied due to invalid credentials. You do not have permission to view this directory or page using the credentials that you supplied.","smt-99b2f71a-1b3b-418e-bd6b-125f61a53feb","options":"widget=text,alignment,markSentences"}'
data = '{"appID":"Tilde|EU Presidency|Web","text":"' + str(sourcetext).encode('utf-8').decode('utf-8') + '","systemID":"smt-160de000-f719-4d5b-9daa-34859345e889","options":"widget=text,alignment,markSentences"}'
# smt-160de000-f719-4d5b-9daa-34859345e889 de-en
# smt-d57b1605-598b-46a8-8ad9-4b8e2499b9cf en-ro
# smt-99b2f71a-1b3b-418e-bd6b-125f61a53feb en-de
# smt-693519e3-465c-460f-807b-3ad4736ce6b8 ro-en

response = requests.post('https://letsmt.eu/ws/service.svc/json/TranslateEx', headers=headers, data=data)

print(data)
# print(response)
print(response.text)
translation = response.json()
# print(str(translation['translation']).encode('utf-8').decode('utf-8'))
# sys.stdout.buffer.write(str(translation['translation']).encode('utf-8'))
# jsontext = json.loads(response.content)
# print(jsontext)


# curl "https://letsmt.eu/ws/service.svc/json/TranslateEx" -X OPTIONS -H "User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0" -H "Accept: */*" -H "Accept-Language: en-US,en;q=0.5" --compressed -H "Access-Control-Request-Method: POST" -H "Access-Control-Request-Headers: client-id,content-type" -H "Referer: https://translate2018.eu/?lang=en" -H "Origin: https://translate2018.eu" -H "DNT: 1" -H "Connection: keep-alive"
# curl "https://letsmt.eu/ws/service.svc/json/TranslateEx" -H "User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0" -H "Accept: */*" -H "Accept-Language: en-US,en;q=0.5" --compressed -H "Referer: https://translate2018.eu/?lang=en" -H "Content-Type: application/json" -H "client-id: u-dc4cd3c5-ebc9-4213-ac9d-593c896bc0ea" -H "Origin: https://translate2018.eu" -H "DNT: 1" -H "Connection: keep-alive" -H "TE: Trailers" --data "{""appID"":""Tilde|EU Presidency|Web"",""text"":""401 - Unauthorized: Access is denied due to invalid credentials. You do not have permission to view this directory or page using the credentials that you supplied."",""systemID"":""smt-d57b1605-598b-46a8-8ad9-4b8e2499b9cf"",""options"":""widget=text,alignment,markSentences""}"

# https://www.letsmt.eu/ws/service.svc/json/TranslateArrayEx?appID="Tilde|EU Presidency|Web"&systemID=smt-160de000-f719-4d5b-9daa-34859345e889&textArray=[Katze]&client-id=u-dc4cd3c5-ebc9-4213-ac9d-593c896bc0ea

# $publicAppid = 'wikiapp',
    # $apiUrl = 'https://letsmt.eu/ws',
    # $webIframeUrl = 'https://readymt.tilde.com',
    # $currentKey = $publicAppid + '-u-918f738b-7413-405d-acda-577ac8825db2'; // live;
    
'''
import requests

headers = {
    'authority': 'www.letsmt.eu',
    'pragma': 'no-cache',
    'cache-control': 'no-cache',
    'data-type': 'json',
    'client-id': 'u-5d4e301e-cddc-4f21-a350-0c3e5d2bee37',
    'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Mobile Safari/537.36',
    'content-type': 'application/json',
    'accept': '*/*',
    'origin': 'https://www.presidencymt.eu',
    'sec-fetch-site': 'cross-site',
    'sec-fetch-mode': 'cors',
    'sec-fetch-dest': 'empty',
    'referer': 'https://www.presidencymt.eu/',
    'accept-language': 'en-US,en;q=0.9,de;q=0.8,ro;q=0.7',
}

data = '{"appID":"Tilde|EU Presidency|Web","options":"widget=text,alignment,markSentences","systemID":"smt-e-transl-de-ro","text":"Das ist gut"}'

response = requests.post('https://www.letsmt.eu/ws/service.svc/json/TranslateEx', headers=headers, data=data)

print(response.text)
'''



'''
import requests

headers = {
    'authority': 'www.letsmt.eu',
    'pragma': 'no-cache',
    'cache-control': 'no-cache',
    'data-type': 'json',
    'client-id': 'u-5d4e301e-cddc-4f21-a350-0c3e5d2bee37',
    'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Mobile Safari/537.36',
    'content-type': 'application/json',
    'accept': '*/*',
    'origin': 'https://www.presidencymt.eu',
    'sec-fetch-site': 'cross-site',
    'sec-fetch-mode': 'cors',
    'sec-fetch-dest': 'empty',
    'referer': 'https://www.presidencymt.eu/',
    'accept-language': 'en-US,en;q=0.9,de;q=0.8,ro;q=0.7',
}

data = '{"appID":"Tilde|EU Presidency|Web","options":"widget=text,alignment,markSentences","systemID":"smt-e-transl-de-ro","text":"Das ist gut"}'

response = requests.post('https://www.letsmt.eu/ws/service.svc/json/TranslateEx', headers=headers, data=data)

print(response.text)

python3 get-system-list.py u-1ca29e75-8438-4878-ab63-49d31ed5442c

python translate-text.py u-1ca29e75-8438-4878-ab63-49d31ed5442c smt-d57b1605-598b-46a8-8ad9-4b8e2499b9cf "<div>This is an <b>example</b> of a translation request <img src=\"http://letsmt.eu/images/tilde.svg\" /> with formatting tags.</div>"

Client ID

u-1ca29e75-8438-4878-ab63-49d31ed5442c

u-5d4e301e-cddc-4f21-a350-0c3e5d2bee37
Austria 2018 Tilde
u-dc4cd3c5-ebc9-4213-ac9d-593c896bc0ea
Estonia 2017
u-dc4cd3c5-ebc9-4213-ac9d-593c896bc0e
    
System for fr-lt: 'French - Lithuanian (NMT) LT-MT3'
ID: smt-a1d5726f-4356-4e0c-bc04-42dd8d117722

System for fi-sv: 'Finnish - Swedish (NMT)'
ID: smt-c522efc0-2493-4063-9feb-7cd1610276e9

System for lt-ru: 'Seimas - Lt-Ru - NMT'
ID: smt-8d6f52a3-7f5a-4cca-a664-da222afe18b5

System for nl-fr: 'FREME Dutch-French Legal - v1.1'
ID: smt-63f939f0-7ebf-4b45-978a-4fa4714c601b

System for en-sv: 'NLG English-Swedish SMT System'
ID: smt-51ae56f7-cc70-4486-b854-6a85ee57b9a6

System for en-fr: 'EN-FR (Canadian) TRSB Domain System - NMT'
ID: smt-0fd9e73e-2e1d-4be4-b09f-8f304521285d

System for fr-en: '_TMP to get fr-en probability dictionary'
ID: smt-68e08cf1-7b27-4afb-bab4-941f74392eab

System for pl-lt: 'PL-LT -- general -- v3 (incl. 1M MS UI Strings 2012)'
ID: smt-98017a8a-3432-4533-80a5-df035aa8822f

System for en-lv: 'Medicine EN-LV'
ID: smt-f3689d1e-502c-4065-b053-5988d2fb9213

System for en-lv: 'English - Latvian IT (for ORACLE experiment)'
ID: smt-b33794e2-a109-43fd-8075-4d51aa12b771

System for en-pl: '[INTERTEXT] English-Polish Legal & Finance NMT'
ID: smt-ce57328e-070c-4c58-8d26-37f7493fd2e8

System for lv-en: 'MNKC mono data fetch'
ID: smt-9bf820e1-342b-45b4-b1c0-a6b62ce39313

System for en-de: 'English - German Legal V2.0'
ID: smt-c71f6f22-5e2a-4682-a4cc-2d97d38fac5e

System for lv-ru: 'ERAF-MT LV-RU (valsts pārvalde) v0.4'
ID: smt-2120323c-dc13-4b99-92fc-def2aa620228

System for en-lv: 'test en-lv fetch'
ID: smt-cb95eb1e-5a62-41ec-9369-2c5162835634

System for et-en: '[EE EU Presidency 2017] ET-EN - CyberSecurity v2'
ID: smt-3c71368a-c334-44c4-a411-34bd6c3769ee

System for en-lv: 'EESC corpus test. DCEP + EESC'
ID: smt-528fc619-ea0f-4a4b-bd55-8231d3f244ad

System for es-en: 'Spanish - English (NMT) Lynx'
ID: smt-4eafabb9-7cd6-4ae6-9dd6-6b7cc68925bb

System for en-lv: 'English - Latvian Automotive (VOLVO+Jaguar+others)'
ID: smt-6e22d957-a2d7-4c27-97cd-f370eb99b092

System for en-pl: 'GET IT - En-Pl - NMT'
ID: smt-bf087dc6-9230-44e3-aafa-4e541e92732c

System for ru-lt: 'Ru-Lt (General) NMT'
ID: smt-762e0ddf-e034-4922-85eb-edfa2f1cd8e2

System for lt-fr: 'Lithuanian - French (NMT) Legal LT-MT3'
ID: smt-d8313a9e-34d1-440a-a485-196b6ebe9527

System for en-es: '[Printful] English-Spanish NMT System'
ID: smt-09772173-4c66-4718-8bb9-e711cc9b71b5

System for en-lt: 'English - Lithuanian IT (v6.0.2) (Oracle)'
ID: smt-28477162-8e79-487f-80f3-ef8408fe5252

System for sv-fi: 'Swedish - Finnish  (NMT)'
ID: smt-d14cff5a-5463-47f1-80c9-fe22e1bddd95

System for en-lv: 'Tilde Localization. English-Latvian IT '
ID: smt-76c4626c-3234-4ee0-b424-bef0a63dedc7

System for fi-en: 'Finnish - English (NMT) V1 Stockmann'
ID: smt-396b4810-0a06-4644-8e10-f0a8e1d764c0

System for en-et: 'English - Estonian IT with Dynamic Learning Disabled'
ID: smt-73931d5d-bda5-4bea-ba61-4ec3b79794e1

System for en-et: 'English-Estonian Sockeye NMT System (WMT 2018)'
ID: smt-6fcd5014-55fb-4fcb-a508-0de6408ab67c

System for en-lt: 'TB English - Lithuanian (General) v4 - Copy for LT Mono Corpora'
ID: smt-2768d4d6-01fa-4aa8-9fde-db86724288d9

System for de-lv: 'German - Latvian (NMT)'
ID: smt-bbcc3598-1999-4805-8df4-64edd0d80fdb

System for en-lt: 'Oracle'
ID: smt-7301b611-ffc5-4665-b8a8-dbabd7a69ac2

System for pl-en: 'GET IT Pl->En dummy system'
ID: smt-889b2453-0d7b-491e-a020-14baf606cae3

System for en-lv: 'Tilde Corpora Test. Microsoft 2014'
ID: smt-fd862419-05d7-4230-97ee-ff7511c19c0a

System for en-es: 'LYNX EN-ES dummy'
ID: smt-faf4ea3f-6c55-4ec8-8e3b-b1b2e5ca771c

System for lv-ru: 'ERAF-MT LV-RU (vispārējā) v0.4'
ID: smt-04969987-ccf0-4b71-9950-dd397439b39a

System for lv-et: 'Latvian-Estonian baseline'
ID: smt-ddb22d10-ba59-400e-8703-6b3e5908dfbe

System for et-en: 'Estonian - English (General) v2.1 - Copy 2'
ID: smt-1f5cef06-9981-4a41-ad10-f96e46f993b7

System for ru-lv: 'ERAF-MT2 • Mono TIESLIETU jomas korpusi'
ID: smt-ed98ec5a-86e9-45df-a50b-ae8f5f80853e

System for en-lv: 'English - Latvian IT (EN-LV v5.1)'
ID: smt-10562c8e-89ae-45ff-875f-7b5cf57be86b

System for sl-en: 'Slovenian - English Finance GORR (NMT)'
ID: smt-91719894-1ca3-4867-b6c4-4f6aad4d3398

System for en-pl: 'GET IT data fetch-clean'
ID: smt-c5a8d8e1-6556-4b42-b291-77366608c04e

System for en-ru: 'Hugo.lv UI translator (EN-RU)'
ID: smt-6d4fe537-71cc-4434-be44-9afd48567d6d

System for en-pl: 'English - Polish Patents (NMT) v2'
ID: smt-9d2ba06f-4f3c-48bb-aaef-5d93d8fe9624

System for lt-en: 'Lithuanian - English (NMT) Legal LT-MT3'
ID: smt-314ac4fb-a349-45b7-8784-2b6e23601514

System for lt-de: 'LT-MT3 LT-De Data Fetch'
ID: smt-0c8e1ff1-732d-4dab-88ea-95a2e66de98a

System for en-fr: 'EN-FR (Canadian) TRSB Broader Domain System'
ID: smt-bc8e6f7c-c973-4ce1-b223-c5c338b152d1

System for en-lv: 'TB2015 EN-LV v02.2 - more data - data loc'
ID: smt-3dab00d6-709d-479c-bf00-8d4486b4fb35

System for en-lt: 'English - Lithuanian v5.0 (automotive)'
ID: smt-712f6361-7ae2-4508-99a1-8255ac46f66f

System for en-et: 'English - Estonian IT (v5.1)'
ID: smt-ebb12e2e-7b6e-49c8-91f8-8d1bf1878019

System for en-lv: 'English - Latvian IT (EN-LV v5.8) without eval&dev filter'
ID: smt-fefb0423-b090-4b9f-a1e8-72f4de32c9eb

System for en-de: '[allintranslations.com] EN-DE data fetch'
ID: smt-8b98760f-d026-4ffc-b5bd-e53c461dff50

System for nl-de: 'FREME Dutch-German Legal - v1.1'
ID: smt-bb73bc07-7516-4d61-b6ee-cccfe1f8eab9

System for lt-de: 'Lithuanian - German (NMT) v0.2'
ID: smt-4a15e40b-042d-4cbb-bab6-36a8f70f4575

System for lt-en: 'Lithuanian - English IT (v1.1)'
ID: smt-7f5cbf7a-4b5e-4822-b37a-e2b6d62ab9e3

System for de-et: 'Danpower - DE-ET - for Data Fetch'
ID: smt-ee0b7844-28c9-46d6-b623-863a978353de

System for lv-ru: 'LV-RU system for corpora fetch to build data for Probability dictionaries'
ID: smt-9b2059d6-8970-44d7-a302-7f7c5d3a161a

System for en-lv: 'EESC corpus test baseline. JRC Acquis + DCEP + DGT + Europarl only'
ID: smt-2ce7a40b-7429-449b-99bb-5d6ce3d53a28

System for en-sv: 'NLG English-Swedish Adapted NMT System'
ID: smt-5debdeb0-8bd9-41b7-a3d1-b1a64b2f17e1

System for en-lv: 'TB English - Latvian (v03)'
ID: smt-e3080087-866f-498b-977d-63ea391ba61e

System for en-lv: 'English - Latvian IT (EN-LV v5.8)'
ID: smt-4a3835e0-cd63-46bc-8a44-65493e1b69a1

System for en-lv: 'English - Latvian IT (v4)'
ID: smt-89ce23df-462c-4910-a5fe-5b5de144caad

System for en-lt: 'EN-LT  (General) NMT'
ID: smt-479476a6-64a0-4eba-ba1a-2918e2296d9e

System for en-lv: 'Tilde Localization. English-Latvian IT - Part 2 (Microsoft corpora only)'
ID: smt-4123bf84-e5e2-477c-a0ad-2e62edc88f4a

System for en-lv: 'IT'
ID: smt-d39ab8f5-43ff-44f7-a03a-3b0e6a843bc4

System for lt-pl: 'LT-PL -- general -- v3 (incl 1M MS UI Strings 2012)'
ID: smt-7435d639-cf9f-44b3-9796-90b8d842818a

System for de-en: 'DE-EN Legal System Baseline - With Dynamic Learning'
ID: smt-fe2c95a4-c387-47c5-bf4f-fb446d48f4aa

System for de-en: 'mono-translated.en-de - corpus clean'
ID: smt-27303ac3-83fc-4161-8471-97546c001961

System for en-lv: 'EESC corpus test. DGT + EESC'
ID: smt-22ef75c1-b4bc-4127-916c-dd44d099e09d

System for lt-en: 'EN IT Mono corpora fetch (LT-MT3, to build EN IT Language Model)  - Copy'
ID: smt-8d36322a-12db-4341-9c72-01fb1d7dad35

System for et-en: '[EE EU Presidency 2017] ET-EN - CyberSecurity'
ID: smt-9a579b70-b5e2-4a22-a0dc-43eef3fbd78b

System for nl-en: '[NL-EN] Test'
ID: smt-36a40806-cbb7-4f87-a8b8-712307e1be74

System for en-lv: 'MNKC EN-LV SMT'
ID: smt-32e567f7-6f39-4297-a00c-0434dea12807

System for lv-en: 'ERAF-MT2 • Mono TIESLIETU jomas korpusi'
ID: smt-fc1e62d2-257b-4a4a-ae23-865643144d11

System for en-et: 'IT'
ID: smt-73de35cc-6bee-494a-a681-15c87299956c

System for en-da: 'English - Danish -- data fetch -- for dsb.dk'
ID: smt-59b723c1-3283-4da6-a245-bca5c14096b2

System for de-en: 'DE-EN Adapted NMT System for Kothes'
ID: smt-acccba78-db2b-44d5-9478-c42ffb888e4e

System for lt-en: 'IADAATPA LT-EN'
ID: smt-24a5eff7-5784-4a60-900c-2f90b093e424

System for de-sk: 'German - Slovak (NMT) Aspena'
ID: smt-968eb397-281c-415f-a2c3-cb3d6f6e1469

System for en-lv: 'TB2016 EN-LV v0.2 (more data)'
ID: smt-7c5e5db9-650b-48d6-b148-fefb2fddb599

System for ru-et: 'General domain system'
ID: smt-778032a7-4783-4d77-9cfb-b17a2bebb3af

System for pl-en: 'Polish - English Clinical trials (NMT) Kontekst'
ID: smt-1f04c12a-50de-470b-80ee-0a21fa2ab244

System for en-da: 'English - Danish (NMT) AGA'
ID: smt-a3152f7c-8256-4dea-91a5-b93ecf8e491e

System for en-lv: 'Angļu - Latviešu (Mežu nozares)'
ID: smt-54e11922-eee7-4f43-9120-bc965852885b

System for en-et: 'IT system'
ID: smt-27527309-10d6-4c7d-885d-3843dd4b5081

System for en-lt: 'English - Lithuanian IT (v4)'
ID: smt-8ce0396a-b4fb-4a2f-9e0d-ca8dd129e77e

System for da-en: 'Yet another DA-EN baseline'
ID: smt-1c30eee6-1d60-481f-8d3e-6aa3d25d1d33

System for lt-pl: 'Lithuanian - Polish (NMT) LT-MT3'
ID: smt-fc794ab3-c8af-4e29-b460-4d93adb74ce5

System for de-en: 'Austrian presidency - De-En - non-tuned NMT'
ID: smt-cf4845a7-77dc-4e9b-ab19-8452c282e55b

System for de-en: 'DE-EN Legal System Baseline'
ID: smt-8a8d0a06-c679-409e-9c24-6ab02223e4eb

System for en-lt: 'English - Lithuanian IT (Philips)'
ID: smt-cde5d5be-e6b6-42d0-999b-c2a25450e3b9

System for en-lt: 'English - Lithuanian v5.1 (automotive)'
ID: smt-89522e28-8a63-40a7-bbb0-541b46839f4c

System for de-lv: 'German-English Test'
ID: smt-8b9f24c5-7fae-4e24-be5c-0be23b2385ee

System for en-lt: 'Tilde Localization. English-Lithuanian IT '
ID: smt-2c6984d7-a2b6-42b1-a977-9c44afc33650

System for en-lt: 'English - Lithuanian (General) v5'
ID: smt-b69e08e1-33cd-48f5-96ec-8f589b40ffd6

System for lt-ru: 'Lithuanian - Russian (NMT) LT-MT3'
ID: smt-0d55ef71-81ef-40fc-8192-c43776ff1145

System for lt-de: 'Lithuanian - German (NMT) LT-MT3'
ID: smt-2803fa35-6c0d-45a7-ac18-fb9618d7cac3

System for ro-en: 'Romanian - English (NMT) Presidency'
ID: smt-693519e3-465c-460f-807b-3ad4736ce6b8

System for en-lt: 'LT Seimas korpusi pietrenēšanai - data fetch'
ID: smt-e1de6ffe-63b2-465a-8c17-6a3c9ec10223

System for en-lv: 'English - Latvian IT (EN-LV v5.8) without eval&dev filter [Dynamic]'
ID: smt-99d3a40a-6804-414d-bfdf-39996465a812

System for et-ru: 'Estonian - Russian (General) v0.1'
ID: smt-d8131189-a7e7-47bb-b31e-f2917251f774

System for en-et: 'EU Presidency NMT system (EN-ET)'
ID: smt-f313a5e6-f532-47f4-aa8c-5a963e933a0b

System for en-ja: 'English - Japanese V3 (NMT) TLS Translations'
ID: smt-009d007d-a2ca-4932-968a-8164e0c8380b

System for en-es: '[Chess] English-Spanish NMT System'
ID: smt-1fe03594-30b3-4316-b472-e60fb4298834

System for en-pl: 'FACTSET EN-PL'
ID: smt-c8e3aa16-3c2a-4d5c-a805-917dc79dd021

System for en-lv: 'ERAF-MT2 • Mono LV KULTŪRAS jomas korpusi'
ID: smt-2ba3abd8-9fb8-4e55-a5e0-be25fc0faa21

System for en-sl: 'Amidas En-Sl - Only In-domain data'
ID: smt-29b1365a-db28-43d0-9065-94c89a221a71

System for lt-pl: '[LT-MT3] Systems to build data for Probability dictionaries'
ID: smt-87f3d811-8ff3-4f1a-ac27-91e0b651bcc1

System for en-lv: 'ODINE - EMA test. EMA only'
ID: smt-124914f5-bb6f-4ba9-a098-ebac410f00e7

System for lv-en: 'TB2016 LV-EN v01'
ID: smt-08ff5e72-e016-4763-9dfc-7ad5362945cd

System for fr-lv: 'FR-LV Lettonie - Francija'
ID: smt-6020be88-49b0-4260-ad9d-4b50ef4e564e

System for en-et: 'Celsius EN-ET-NMT'
ID: smt-fff0096b-9a37-4b67-baa2-2f8317ab392e

System for ru-et: 'Russian - Estonian (General) v0.1'
ID: smt-1a98cbd0-8e28-4388-a916-05d5883410a4

System for en-lv: 'English - Latvian IT (EN-LV v5.8) with eval&dev filter'
ID: smt-1a589c0d-b56f-4e29-a8c8-f0bc695d0620

System for lv-en: 'Rail Baltica - Lv-En - NMT'
ID: smt-2ac18dd7-044b-46f3-adf1-97eefa521c9c

System for en-fr: 'Open Data demo system (EN-FR)'
ID: smt-cfd2a84b-ddb3-4431-a5bb-dfa578544974

System for en-sl: 'Amidas - En-Sl - NMT'
ID: smt-0da5d07f-c557-42fb-8f26-b0473b38fe2f

System for en-lt: 'English - Lithuanian (NMT) Legal LT-MT3'
ID: smt-c7d62941-60d6-41a6-aa2f-16d2d8d815d4

System for en-lv: 'ERAF-MT EN-LV (vispārējas jomas) v0.4'
ID: smt-1c08a5bb-95e8-4806-9a7f-3a9ad2114eca

System for en-pl: 'Locworld En-Pl - NMT'
ID: smt-9979a8e9-2428-49ed-b627-c95337533ab2

System for de-en: 'Austrian presidency back-translated De to mono En corpus-clean'
ID: smt-0a61bedc-89f1-4c8b-8463-bab6dc73d6f1

System for en-lv: 'EESC corpus test. Europarl + EESC'
ID: smt-f5fe7a74-4d1b-4b8f-86a1-e536cf63938d

System for en-lv: 'TB2016 EN-LV v0.3'
ID: smt-01214522-5f46-440f-984d-c8ad3bb01baa

System for et-en: '[EE EU Presidency 2017] ET-EN - General Domain v2'
ID: smt-7cce8647-8aa0-40d8-b1b6-77295c0b23bb

System for en-lt: 'WMT19 EN-LT SMT Baseline '
ID: smt-a08583ae-454c-4d76-80f9-f86b0c5351e2

System for lv-en: 'TB Latvian - English NMT'
ID: smt-46b9633d-48d9-4dd7-b096-2516c383a715

System for lv-en: 'LV-EN data from likumi.lv '
ID: smt-5d9f1d2c-f56b-43da-95ab-db26e4f876ab

System for lv-en: 'Latviešu - Angļu (Vispārēja)'
ID: smt-06bd7f86-792f-4d47-a260-b3857439fc1e

System for en-lv: 'EN-LV-Marian-NMT - Updated Files'
ID: smt-16d2a887-317f-4ef4-976b-90bd8c5e1a46

System for en-lv: 'English - Latvian IT (EN-LV v5.8) with eval&dev filter [Dynamic]'
ID: smt-d3c6b4f8-b850-4970-83a7-cad8415a6d31

System for en-lt: '[LT-MT3] Systems to build data for Probability dictionaries'
ID: smt-85320d0e-071c-4845-8859-e99563363837

System for en-ja: 'English-Japanese New data fetch'
ID: smt-f278417e-3f4f-4c3d-9d0b-61f600c911f5

System for lv-ru: 'LV-RU Test'
ID: smt-63e7d12b-fce0-4091-baa2-5aa524cc9e00

System for en-lv: 'EN-LV for Corpus Fetch - Copy'
ID: smt-00682b26-7fa3-483f-a083-6880bf4cf419

System for en-fr: 'EN-FR (Canadian) TRSB Domain System'
ID: smt-db5e8221-b283-4212-8879-06124ce0cc3b

System for en-et: 'TB English - Estonian NMT'
ID: smt-9b3e3178-f0b3-41db-960d-ed2ce09904e4

System for et-fi: 'Estonian - Finnish (NMT) v2'
ID: smt-c45139d8-f1b1-46c2-9b21-c84c51285b03

System for en-et: 'WMT 2018 EN-ET Corpus fetch'
ID: smt-eb980931-8ebd-49b6-abc5-a9a765b3e1c3

System for fi-en: 'Finnish - English (NMT) Presidency'
ID: smt-48b29c38-c6c9-49c3-ab2e-2bfc1c0d1b73

System for pl-en: 'FACTSET PL-EN'
ID: smt-f311f0ac-ee44-49e0-9116-252db1e4a0c7

System for de-sk: 'ASPENA - KAUFLAND - Food [data fetch] - Copy'
ID: smt-ca7533a9-e20d-4cd8-99f8-3db94ff03e0e

System for en-lt: 'Tilde Localization. English-Lithuanian IT - Part 2 (Microsoft corpora only)'
ID: smt-db2da569-c3e6-404b-9dfb-f895d8d0d991

System for en-ro: 'ro-en-test - Copy'
ID: smt-e63d1422-dc59-450f-93ac-0fde3520ac3a

System for en-lt: 'English - Lithuanian IT (v6.0) (Oracle) – Decode XML entities' OFF'
ID: smt-8fcd5f95-1c4d-4796-98b0-d6ebdec3a47e

System for lt-ru: 'TB Lithuanian - Russian (General) v4'
ID: smt-84c2eb73-da09-48ce-8cb9-71b61f37483c

System for de-sk: 'ASPENA - KAUFLAND - Food [data fetch]'
ID: smt-7ae1e77c-e879-4ecd-94ea-1c2338e1700d

System for en-de: 'Austrian presidency back-translated En to mono De corpus-clean'
ID: smt-1967e392-5c07-44c2-9abe-cbd7f1fcdea4

System for en-et: 'IT system'
ID: smt-f401997e-a928-4f47-b926-2cb0e31491f5

System for en-lv: 'Angļu - Latviešu (Vispārēja) - Sockeye Transformer'
ID: smt-f19dd79f-9399-4dd0-bf36-f5de9c8b21c4

System for en-mt: '[EN-MT] Data Fetch & Phrase Tables Build'
ID: smt-fcc2dc3c-4330-457a-a3c2-69a6e2e7d0b0

System for lt-de: 'Dummy for Lt-De'
ID: smt-edd70df9-88aa-451f-ab54-e61116f0b43f

System for lt-fr: 'Lithuanian - French (NMT) LT-MT3'
ID: smt-9696b5ba-e3a4-420c-b740-916051c5c4e7

System for en-et: 'English - Estonian IT with Dynamic Learning Disabled - Moses 13.10'
ID: smt-94d82eeb-53af-4b6d-8699-f0fc88465c22

System for lv-en: 'Latvian - English (NMT) Linearis 2'
ID: smt-06dfb135-62b2-4380-bae2-1c6f2663e32b

System for en-fr: 'EN-FR skelets TRSB'
ID: smt-2bb8ca98-b9a5-4086-8635-e7d113f087be

System for sl-en: 'GORR Finance SL-EN data fetch'
ID: smt-731ea48e-869c-4b29-b785-25a5da260e7c

System for ru-lt: 'TB Russian - Lithuanian (General) v4'
ID: smt-b05093cb-7395-4fb9-bd15-5fff77d78413

System for en-lv: 'ERAF-MT2 • Mono LV GENERAL (ziņu) jomas korpusi'
ID: smt-556825d1-0ac9-4589-9305-dad3f592da74

System for en-et: 'English - Estonian IT (v5.5)'
ID: smt-789b5215-4e11-49ab-a483-827c18b7f85c

System for en-de: 'En-De for Corpus Fetch'
ID: smt-7e965f8e-3bfc-4c1b-bfee-5858ce0afc01

System for lt-en: 'TB Lithuanian - English (General) v4'
ID: smt-b459424c-0135-4835-b483-39d6bdc49bf8

System for en-lt: 'LT Corpora Inventory'
ID: smt-a78a814c-38db-46e8-a2b9-e155f8a33ed5

System for en-zh: 'AP English-Chinese V6 (Traditional HK)'
ID: smt-55b87498-4932-406a-a4c2-ee0edf61d78b

System for lv-ru: 'Lv-Ru (General) NMT'
ID: smt-7bbc8ee2-1f91-409a-8fea-b470d4f3af4d

System for lt-en: 'WMT19 LT-EN SMT Baseline'
ID: smt-b888ccf9-b5c5-4188-900a-cb9fc57fd89b

System for sv-fi: 'Swedish - Finnish data fetch'
ID: smt-42b154c9-1989-433c-8ee8-a5347796ba04

System for de-nb: 'German-Norwegian MT -- do we have data for this?'
ID: smt-041e1cb5-5409-4ca1-a2a3-d544bd8ff353

System for es-en: 'Spanish - English (Doppler Labs) - v1.2'
ID: smt-dd630d3f-7aff-4610-b8b6-f01cd3b2c49c

System for en-et: 'TB English - Estonian (General) v2.2'
ID: smt-1a0ec013-48c1-4f39-a0b7-382cb27ca387

System for en-et: 'English - Estonian IT (v5.4)'
ID: smt-9d3fa2b2-dd0d-493f-b9f5-f4873917ea83

System for en-et: 'English - Estonian IT (v5.5.1)'
ID: smt-e38939b0-dedd-4091-865d-da204cbe5897

System for en-lv: 'English - Latvian VOLVO'
ID: smt-573a52cb-8728-455a-b05c-ac73865b053f

System for en-lv: 'English - Latvian IT (EN-LV v5.8) without eval&dev'
ID: smt-4bfa5c33-640a-4148-a707-3b3f373e1a87

System for ja-en: 'Japanese - English V3 (NMT) TLS Translations'
ID: smt-e3c86941-564d-47e0-8a33-5fb4504def52

System for sv-en: 'Swedish - English NMT CircleK new'
ID: smt-abf49352-0264-4480-bcfb-a7c7fabd2b0f

System for pl-en: 'Diuna General [data fetch]'
ID: smt-a5b64a69-fc93-464b-b39f-c981492eaf45

System for en-fi: 'Finnish Presidency Data Fetch'
ID: smt-8984b580-6ad5-43b8-a26d-03ff9c34a84c

System for en-pl: 'EVAL EN-PL - General Eval'
ID: smt-aebde704-ef7d-4b92-b402-33dd902dba5d

System for pl-lt: 'PL-LT - dummy'
ID: smt-8960939a-3bee-43c9-94cc-a26960acdd69

System for en-ja: 'English-Japanese SMT'
ID: smt-df1afba0-5e90-41d6-a6f0-f654b2be261d

System for en-lv: 'English - Latvian (NMT) Linearis 2'
ID: smt-a6f1cbc5-1d9f-4c4d-8d4d-6b0699446112

System for en-is: 'en-is dictionary'
ID: smt-5384173b-5fc5-45a1-a105-f0b5a322797c

System for en-lv: 'EN-LV SMT Demo System'
ID: smt-8b6f6a0e-552b-4dce-9724-ff83ac221b0f

System for en-ja: 'English-Japanese full new data fetch'
ID: smt-1ace62d7-8b65-4a6e-9688-8fd4992f856f

System for es-en: 'LYNX ES-EN Legal Data Fetch'
ID: smt-552534dd-9f7a-4cc1-b786-860abd435450

System for lt-en: 'LT-EN SMT Baseline System for LT-MT3'
ID: smt-88b7ea88-f540-4318-ad17-1a27ece043bd

System for en-lv: 'ODINE - EMA test. Tilde EMEA 2014 only'
ID: smt-63ebdc9c-c87d-458f-addb-84372d571b14

System for lv-en: 'ERAF-MT LV-EN (vispārējas jomas) v0.4'
ID: smt-5abbb6ca-f956-44df-823f-9c32848bc806

System for en-ar: 'AP English-Arabic v2.1'
ID: smt-f630c1c4-3267-49d1-983c-f42397620aaf

System for lv-en: '[LMI] LV-EN data fetch for tuning'
ID: smt-db0d1550-f559-448d-9d64-189dc5acae93

System for ru-lv: 'Ru-Lv (General) NMT'
ID: smt-95501b3b-1b31-4d90-b115-c3543f9149cc

System for en-pl: 'EN-PL General SMT'
ID: smt-cc09723e-1fb6-421e-8bb6-581c57f041bb

System for en-pl: '[BIRETA] EN-PL NMT System'
ID: smt-71fdb154-11d9-49e8-b461-a22efbc2fe67

System for et-en: 'eesti - inglise EU Presidency NMT system'
ID: smt-85a613e5-5b6f-473a-84a4-d3fdfb0d187e

System for en-et: '[EE EU Presidency 2017] EN-ET - CyberSecurity v2'
ID: smt-ff2d7d25-dc65-4eaa-8129-0f67a7d5f547

System for et-ru: 'bEstMT ET-RU - November 24, 2017 - NMT MLSTM'
ID: smt-4a9a21c4-0d0d-49f1-a0b1-e4a4b7f6ab4b

System for de-lt: 'Danpower - DE-LT - for Data Fetch'
ID: smt-1798a2b3-b4f1-495d-86b5-2ef0fbdb53af

System for lt-en: 'Lithuanian - English IT (v1)'
ID: smt-c87a0f5c-6761-4f52-836f-ae8cc942eab2

System for de-en: 'Austrian presidency mono data repaired newlines - English'
ID: smt-2c74acbf-c936-4fed-80fd-86c07e1c381d

System for en-lt: 'English - Lithuanian (General) v5.1'
ID: smt-e47b1bce-e32c-41f9-81e6-f4dd9ef04dcc

System for en-pl: 'English - Polish INTERTEXT Medical (NMT)'
ID: smt-856eccfb-9b6d-4f2a-a5fc-9ef6ca4dc897

System for lt-en: 'LT-MT3 LT-EN IT Comparable SMT System'
ID: smt-39cf557b-b8ff-4925-a7b8-bd7d6083a591

System for en-pt: '_SeproTec: English-Portuguese'
ID: smt-794c6133-ce99-4fb6-be49-1d74ba809560

System for nl-en: 'LYNX NL-EN Energy Data Fetch'
ID: smt-1f78b65a-8d9d-49d8-8cff-3f13b4e75540

System for lt-en: 'Lithuanian - English (NMT) Seimas v2'
ID: smt-c9fb0b01-4414-4404-9698-80c5d424f0c6

System for lv-ru: 'TB LV-RU v2.0'
ID: smt-1094dff0-c98e-47f3-8a04-17056da39850

System for fr-lt: 'FR-LT SMT Baseline System for LT-MT3'
ID: smt-667c7342-6eaf-42d5-8320-41520bd9dce9

System for en-lv: 'EESC corpus test baseline. EESC only'
ID: smt-6c9098ac-dd42-41f1-9fe3-a6df9a761528

System for lt-en: 'Lithuanian - English (NMT) WMT 2019'
ID: smt-29ca7818-628c-4bc6-a67a-18b9dd3d4cf9

System for lv-en: 'LV-EN Legal Baseline (NMT)'
ID: smt-6a218329-b9e8-42e8-a9da-f6d30da81619

System for en-fi: 'English - Finnish (NMT) Presidency'
ID: smt-e081b525-3a5e-4e4c-9f93-de46a2c04fa4

System for en-lt: 'EN-LT  (General) NMT - for testing only'
ID: smt-2b29ab02-1782-4913-af99-563eb1ff73dc

System for de-lv: 'German - Latvian (NMT) v2'
ID: smt-f53f243f-bbe5-4bd2-b217-79d08595d40d

System for pl-lt: 'Polish - Lithuanian (NMT) LT-MT3'
ID: smt-3a22d36d-26d5-40b5-bec8-f24bfdd6e4d1

System for en-lv: 'EESC corpus test baseline. JRC Acquis only'
ID: smt-bdc8d1f4-4fba-4d28-97bb-ea9a3ca003f5

System for en-lv: 'TB2015 EN-LV v02.2 - more data'
ID: smt-1faaa188-3781-4e50-a678-cfec86ba5b5c

System for fi-sv: 'FI-SV dummy '
ID: smt-cb64a2f8-f605-439c-a023-86ddf9799845

System for de-lv: 'DE-LV data fetch '
ID: smt-30864045-8536-4b3b-8428-64181b569e3e

System for en-da: 'EN-DA AGA MT Pilot - Dummy '
ID: smt-56c6b9ed-eedd-40a4-bdb4-feb428a180ad

System for lt-en: 'LT-EN (General) NMT - for testing only'
ID: smt-0c5e395d-aea0-403d-bea4-373bed908804

System for en-lv: 'English - Latvian IT (EN-LV v5.7)'
ID: smt-050450e3-e8bd-4c6a-a15f-000c63a0d1b1

System for fr-da: 'TextMinded fr-da - v1'
ID: smt-9ab3ce95-b37e-441a-b5f5-57d902b88e6a

System for lv-ru: 'ERAF-MT2 • Mono RU KULTŪRAS jomas korpusi'
ID: smt-43089f56-cbfa-484c-a43d-e5563800bc85

System for en-de: 'Austrian presidency mono data repaired newlines - German'
ID: smt-913ab605-1d0a-491f-90c6-c244fc1ca72a

System for de-en: 'Austrian presidency mono data Only good newlines - English'
ID: smt-f82c73ac-e587-41a4-a542-67e7754b1aa1

System for da-en: 'DA-EN AGA MT Pilot - Dummy'
ID: smt-5f0140f3-ecc9-4956-a01e-5c9a0d7066ce

System for fr-lt: '[LT-MT3] Systems to build data for Probability dictionaries  - Factorize'
ID: smt-d04e50a5-8565-4bd5-b365-adab890f5587

System for en-es: 'Codex Global - English-Spanish SMT'
ID: smt-dd249f8c-e27c-46cf-80d4-87034d3b680b

System for en-lt: '[WIP] English - Lithuanian IT (v6) DYNAMIC'
ID: smt-ffe84252-53f9-4113-b454-cab725b1b1d6

System for en-zh: 'AP English-Chinese V4 (Traditional HK)'
ID: smt-312c4042-8f46-4247-bdd8-3dcd278e678c

System for en-lv: 'Test 1'
ID: smt-2ccb025c-e94d-47b1-b16a-08a0b3704fb0

System for de-fr: '[Hieronymus] DE-FR data fetch'
ID: smt-f4c844eb-bf54-438a-b778-ffc01d22350a

System for en-ru: 'English - Russian - WMT 2014'
ID: smt-c9aa2618-b830-4faa-ae3e-6c2ce80ff312

System for lv-en: 'Medicine LV-EN'
ID: smt-5ef15791-ab78-476e-ae50-b6714f46e096

System for en-lt: 'LT-MT3 EN-LT legal domain fetch '
ID: smt-448603ad-5cea-4dbe-ba28-44ac77ba5d38

System for en-sv: 'NLG English-Swedish NMT System'
ID: smt-01dff081-808e-4eca-aa92-b8c395d570b3

System for en-lv: 'TB2016 EN-LV v0.1 (more data)'
ID: smt-362b463d-2191-4349-a10f-7f9403709515

System for en-de: 'Austrian presidency - En-De - NMT'
ID: smt-0bd67c10-8317-45ac-9c4a-841844f2af7e

System for en-lt: 'English - Lithuanian IT (v4) - For QE Model Training - Trial 2'
ID: smt-f426c59c-f72d-4d6a-ba4e-1b0c57f3ba2d

System for en-lv: '[OLD] English - Latvian IT (EN-LV v5.3)'
ID: smt-748a8ffe-8482-402b-9c19-c75b642e10de

System for et-de: 'Dummy for Et-De'
ID: smt-b3f4f9b1-0e61-465d-995a-495d75ad877c

System for en-lv: 'English - Latvian IT (for ORACLE experiment 3)'
ID: smt-de588210-d084-409a-ac1f-9dd2c3613ee5

System for en-et: 'English - Estonian IT - Microsoft (v5.0)'
ID: smt-ecb67be9-6abc-49f6-9864-304adc965433

System for en-lv: 'Angļu - Latviešu (Vispārēja) - Test System (MLSTM)'
ID: smt-6ebbc31f-d814-4087-8589-10b8292159a2

System for de-fr: 'German - French (NMT Ensemble) Hieronymus'
ID: smt-8fe555d8-305e-40b4-873a-abf9c82daff9

System for en-lv: 'TB English - Latvian NMT'
ID: smt-5eed677a-31c8-4a2e-bddc-f90c3e6ad96e

System for en-lv: 'EESC corpus test. JRC Acquis + DCEP + DGT + Europarl + EESC'
ID: smt-a04fb95e-ad88-453d-963a-cae5d985c2f6

System for en-pl: '[INTERTEXT] data fetch 1'
ID: smt-f4271fc3-adf0-474d-8ec3-583ad7ba2654

System for de-en: 'DE-EN General System for Kothes'
ID: smt-af3c3f6f-1d1f-4250-a651-90708cb63aa6

System for en-lt: 'English - Lithuanian IT (v6.0) (Oracle) – Decode XML entities' ON'
ID: smt-b4ecf41e-75ee-4d22-9161-5cc28bf51d67

System for en-ja: 'English - Japanese v2 (NMT)'
ID: smt-b8369b5f-61f5-40d4-83d6-cc41cdf43212

System for de-en: 'De-En for Corpus Fetch'
ID: smt-c2931afb-fa4c-4dde-8746-7764323fe27f

System for en-lv: 'EESC corpus test baseline. DGT'
ID: smt-06f67a55-a867-4c8c-a09d-259238898e2a

System for et-en: 'Estonian-English tests on LetsMT5. v1 - baseline'
ID: smt-33ff38b8-2cfc-4c81-b816-6a33cd908116

System for en-lv: 'Linearis En-Lv - NMT'
ID: smt-2495e05c-7593-4508-852e-80bac4e5bbdc

System for en-bg: 'EU Presidency English-Bulgarian NMT (adapted)'
ID: smt-2f65da7c-6b4f-4c29-bd47-ada26111c861

System for en-lv: 'English - Latvian IT (EN-LV v5.0)'
ID: smt-1063a6f4-3a24-4fb2-b2fd-57abce86d5fb

System for en-sl: 'English - Slovenian (NMT) - GORR'
ID: smt-ba509496-174a-4926-b400-62d54f8a79d5

System for en-pl: 'EVAL EN-PL - GetIT - General eval'
ID: smt-192ab252-8bd1-40af-88e5-9bfd65c3bc62

System for en-lt: 'Seimas - En-Lt - data fetch'
ID: smt-829145b1-0e50-4458-8468-bb7c54c8f047

System for et-de: 'Estonian - German (NMT) v0.2'
ID: smt-75c66b39-691e-4d60-a075-5004401e5a24

System for lv-en: 'LV-EN Legal Baseline'
ID: smt-2bc36025-c46d-4a6a-844a-437daf89d40c

System for lt-en: 'Seimas - Lt-En - NMT'
ID: smt-4814ebbd-b354-4a9c-83c8-8dbf64f60d44

System for en-de: 'En-De  [non-tuned NMT]'
ID: smt-99b2f71a-1b3b-418e-bd6b-125f61a53feb

System for en-lv: 'English - Latvian IT (EN-LV v4.2)'
ID: smt-c2adb90f-ee32-442a-b81a-3666b6124dc6

System for fr-lt: '[LT-MT3] Systems to build data for Probability dictionaries '
ID: smt-26d26f4e-78a9-4ef5-9d17-3153a4683dc0

System for en-lv: 'ODINE - EMA test. EMA UNIQUE only'
ID: smt-a959f31b-b9f5-4749-8157-4cf0a074b4f4

System for en-et: 'English - Estonian. Medicine domain'
ID: smt-afdafea3-e2d0-4574-b091-42e4f837ae19

System for fi-et: 'Finnish - Estonian (NMT) v2'
ID: smt-8a5bbd0f-96c4-42c3-869f-db15abe406ca

System for lt-en: 'WMT19 LT-EN mono corpus fetch'
ID: smt-c8e4778a-13a1-4977-8085-3eebc30a103c

System for en-sv: 'English - Swedish NMT CircleK new'
ID: smt-f2e35605-1d5e-4d69-8664-d27a71a1ac26

System for et-lv: 'Estonian-Latvian baseline'
ID: smt-30eab85e-8ac2-44b4-bcd6-a985f696bec8

System for en-lv: 'ERAF-MT EN-LV (valsts pārvalde) v0.4'
ID: smt-7108bcc9-0646-4b4a-87ac-d240977b9380

System for et-en: 'TB Estonian - English (General) v2.1'
ID: smt-f9b2dea6-628f-440b-b4a6-7e25f08f2470

System for en-lt: 'English - Lithuanian IT (v6) (Oracle)'
ID: smt-960179d7-604c-41d1-b8a0-01c5b78b2f39

System for en-ja: 'English - Japanese (NMT)'
ID: smt-db5ae0b8-a8a7-4af6-b44f-6a840dc57b73

System for pl-en: 'Polish - English Pharmacy (NMT) Kontekst'
ID: smt-5a4c0518-826b-443d-9dd6-b8ca2de5fd49

System for sv-en: 'Swedish - English (NMT) CircleK'
ID: smt-11379275-2632-4928-9388-6ae7dd1e012d

System for en-lt: 'WMT19 EN-LT Unlimited data fetch'
ID: smt-e62e8863-dfb2-41e3-92d4-0d2e9a010cd2

System for sv-en: 'Circle K Data Fetch'
ID: smt-d4531484-8cc9-45a9-89db-35f3d14cbfce

System for lv-ru: 'ERAF-MT2 • Mono TIESLIETU jomas korpusi '
ID: smt-6717b290-947d-4389-bf2c-164603e4ce2b

System for en-lv: 'English - Latvian IT (for ORACLE experiment 2)'
ID: smt-b31e32e5-c83c-4de5-b14a-41d4755603dc

System for es-en: 'Spanish - English (Doppler Labs) - v1.3'
ID: smt-548f46b6-38bb-4fdf-b328-aa81546fee8a

System for en-lt: 'English - Lithuanian IT (Philips) v5'
ID: smt-9442ad67-6422-4c3a-b376-2735e064dd29

System for bg-en: 'EU Presidency Bulgarian-English NMT System (initial)'
ID: smt-5831a8d1-9657-4c45-b657-9797fc4ba8e2

System for en-ro: 'Romanian Presidency - dummy system'
ID: smt-85756325-c426-4659-85aa-405a79094bca

System for en-nl: 'English - Dutch (NMT) Lynx'
ID: smt-8fc59d9e-5566-4e35-af4b-98382578cdf2

System for de-fr: 'German - French (NMT) web-tuned Hieronymus'
ID: smt-1ebfb927-c218-45bf-9313-e9cacfd1350a

System for en-ja: 'English-Japanese MT -- do we have data for this?'
ID: smt-f8beacdc-6472-48e0-adfc-bcfee293d1d2

System for en-et: '[EE EU Presidency 2017] EN-ET - CyberSecurity'
ID: smt-88898ff2-bf11-4972-b980-50c49d9d295a

System for en-fr: 'EN-FR (Canadian) System'
ID: smt-0bc72257-75c3-4064-bd8f-31bfa61aa9d0

System for en-cs: 'Presto EN->CS auto-moto'
ID: smt-36bedbaa-b155-43af-b347-5d9c03832acb

System for en-lv: 'English - Latvian IT (EN-LV v5.4)'
ID: smt-167d7a46-5aec-4d4d-8fdd-6508ddbe6eb8

System for et-en: 'Estonian-English Sockeye NMT System (WMT 2018)'
ID: smt-9ac1c0de-233b-4614-ac69-b1a092bb8f9e

System for en-et: '[EE EU Presidency 2017] EN-ET - Presidency'
ID: smt-978024f2-0701-434d-9e64-6f62ed78a59f

System for en-lv: 'Demonstration System'
ID: smt-2ad189ca-4818-477d-9df6-2ed9321102d3

System for en-nl: 'LYNX EN-NL dummy '
ID: smt-3f2f3a32-0dc7-4f76-9855-a75de937e6f8

System for en-lv: 'Linearis'
ID: smt-b4fc9d46-fd63-45b7-94f4-ab4ac03fa31f

System for en-pl: 'EN-PL Getit-NMT/Google evaluation'
ID: smt-2f15ddda-47fd-4613-9efd-aa9e274b4e32

System for ru-lt: 'Dummy'
ID: smt-74a9bdf7-6a4d-4d05-8b11-52becbba4691

System for en-es: 'ITU - Telecommunications EN-ES NMT'
ID: smt-cf7ff971-ec56-40e2-bb32-1a03973fa56f

System for en-pl: '[BIRETA] EN-PL Data Fetch'
ID: smt-589477c0-85c6-4915-9dc9-5291b9dd3c6e

System for lv-en: 'Rail Baltica Lv-En'
ID: smt-a2980bfc-4c71-47d6-aa70-0d101f8ed287

System for en-lv: 'EESC corpus test baseline. Europarl only'
ID: smt-81d0377e-15a8-48dd-a0d1-1d3afbc1e622

System for de-en: 'mono-translated.de-en - corpus clean'
ID: smt-a97cea8d-3111-488b-84cd-8ca563b5ad8e

System for en-sv: 'CircleK En-Sv Dummy'
ID: smt-8bcf699c-1949-49c6-8c15-d0f8a79336dd

System for et-en: 'Estonian - English (General) v2.1 - Copy'
ID: smt-1616f2f1-40c5-4443-9d5a-721a98046cd0

System for en-es: 'Chess English-Spanish Data Fetch'
ID: smt-bf18fc36-d13b-4af0-a829-4bbebe9b88f2

System for en-pl: 'English - Polish Clinical trials (NMT) Kontekst'
ID: smt-eb12fe16-c5e4-471b-81d6-9528ec41aa45

System for nl-en: 'FREME Dutch-English Legal'
ID: smt-9b69c17c-7c51-4cc9-94d7-8160460c9bc4

System for lv-de: 'LV-DE dummy'
ID: smt-19229494-a44f-4a40-84c2-cea6e30fa00b

System for en-lt: 'English - Lithuanian IT (v6) - Copy'
ID: smt-867a0958-e380-4215-a1a9-0ae790aef9af

System for en-lv: 'Oracle'
ID: smt-8f35f4fb-37ae-4e19-b86a-fc931f430a7b

System for lv-ru: 'LV-RU (Valsts pārvalde) baseline + with-for MultiUN LV-RU evaluation - Copy'
ID: smt-7777f5da-3a19-40c9-98c4-8c580825aca3

System for en-nb: 'STP EN-NO NMT'
ID: smt-67bfee6d-94e2-4a72-9b41-95dea91c136e

System for en-pl: 'EVAL EN-PL - GetIT'
ID: smt-2509b5cf-cbb6-4c23-8931-c80463b455db

System for ru-lt: 'Russian - Lithuanian (NMT) LT-MT3'
ID: smt-46a7fe0e-e1d3-4fd0-ab21-f6d40db609f3

System for pl-en: 'GET IT - Pl-En - NMT'
ID: smt-59d71f23-a43b-442a-84c3-327aedfcfec6

System for et-fi: 'Estonian - Finnish (NMT) v1'
ID: smt-65c04708-bf14-49d6-89b0-d81d6d5b7161

System for lv-de: 'Latvian - German (NMT) v2'
ID: smt-867c0dcd-1ae3-4a9f-9dbc-17f8fd956e52

System for es-ro: '_SeproTec: Spanish-Romanian'
ID: smt-2f22196a-66a4-4709-acfd-7b57c57bc911

System for en-lv: 'Test System'
ID: smt-1cd5bac8-45f3-4b5b-a79d-17985e9d3774

System for et-en: 'TB Estonian - English NMT'
ID: smt-fea9837f-2179-4057-be22-e5cf1c4a316e

System for fr-en: 'FR-EN Baseline NMT'
ID: smt-c24d97d3-536b-4e2d-8810-f2e46a4a897d

System for lt-en: 'LT-MT3 LT-EN IT domain fetch'
ID: smt-4093256f-c36f-47d1-ad63-c8895c4e8578

System for en-lt: 'English - Lithuanian IT'
ID: smt-94aa3716-9ac3-4588-bf6b-b838145feeff

System for lv-en: 'TB2016 LV-EN v01.1 Date localisation'
ID: smt-2830e860-c135-4f5c-917c-7d3d669f9419

System for fr-sv: 'TextMinded fr-sv - v1'
ID: smt-3d4fc9fa-08c8-4402-8d09-6b0bde84d20e

System for en-lv: 'EESC corpus test. JRC Acquis + EESC'
ID: smt-2e246eb8-ba9b-4a28-b7b6-a1fa0b6c865a

System for en-pl: 'EVAL EN-PL'
ID: smt-ad84638a-5e79-4be9-9f7d-5d550bfae42e

System for en-ar: 'AP English-Arabic v1'
ID: smt-d7570198-5d62-4700-8af1-7f96ad711557

System for en-lv: 'English - Latvian IT (EN-LV v5.2)'
ID: smt-2c4b290c-76a3-45ee-8096-0d5b167ebfd2

System for ja-en: 'Japanese - English (NMT)'
ID: smt-5dab723e-195e-468d-a7b5-69e6e63fe2aa

System for lt-pl: 'LT-MT3 LT-PL Data Fetch'
ID: smt-b0f78069-4d5f-40b9-9508-4638f02bd591

System for en-da: 'Amesto EN-DK baseline eval using client's eval corpus'
ID: smt-38eca5c1-f060-4ae1-ad22-1aa8230bf5b5

System for en-ro: 'English - Romanian (NMT) Presidency'
ID: smt-d57b1605-598b-46a8-8ad9-4b8e2499b9cf

System for en-lt: 'English - Lithuanian (General)'
ID: smt-9ec7846e-23d2-4f6a-82d2-fa7be380b19a

System for en-ga: 'en-ga-test'
ID: smt-c836202f-7400-4145-b5b1-dcbafc5d8448

System for lv-en: 'Latviešu - Angļu (Vispārēja) - Sockeye NMT'
ID: smt-707fe5ce-98f4-46ae-b01a-03070a0db25c

System for lt-ru: '[LT-MT3] Systems to build data for Probability dictionaries'
ID: smt-7d77024a-c759-4b0b-8868-129e54316625

System for de-lt: 'German - Lithuanian (NMT) LT-MT3'
ID: smt-8b22a71a-00e9-4e10-88f0-d68e9530da59

System for en-lv: 'English - Latvian IT (v7.0) - for NMT training'
ID: smt-4e4fa6e2-abba-4056-ab81-6e407d077b01

System for et-en: 'Estonian - English (General) v2.2'
ID: smt-0e4c813d-e2be-4f2d-8889-d6bb99d36099

System for lv-en: 'Linearis LV-EN - data fetch'
ID: smt-59bc1198-606e-4a36-8c3f-bb07722f6aa2

System for en-lv: 'ERAF-MT2 • Mono TIESLIETU jomas korpusi'
ID: smt-00f9a76e-7b4d-48a3-b1b7-07be4f172db5

System for en-et: 'Oracle'
ID: smt-9030dfed-1121-49e0-a427-2e648bf59feb

System for en-sl: 'Amidas En-Sl'
ID: smt-abe37866-81f2-4863-a358-b620ad8a4160

System for et-fi: 'Et-Fi Dummy system'
ID: smt-5121ff36-70a2-4f4e-b37c-0a88c32973fd

System for en-et: 'EN-ET-Marian-NMT - Updated Files'
ID: smt-73a303d6-6e20-4869-89cc-a275304357bb

System for en-lv: 'TB2015 EN-LV v02.1'
ID: smt-505293d9-9b8f-441f-bea4-d830f179ee18

System for en-et: 'Celsius EN-ET-SMT'
ID: smt-6e36d478-f2ce-4ad5-9b02-ab8da68947d7

System for de-lv: 'DE-LV data fetch2'
ID: smt-8b3c5570-8646-477d-a691-5883a3f9bc98

System for en-ar: 'TB English-Arabic v1 - NMT (production)'
ID: smt-c4a3aa54-92b0-48de-88fd-feabdfab6219

System for de-fr: '[Hieronymus] DE-FR In-Domain NMT System'
ID: smt-98090b03-4be1-494e-a771-6d5b33af6078

System for fi-en: 'Stockmann FI-EN data fetch'
ID: smt-c2a94829-e4bf-4264-a1bc-16b1a6d63795

System for nl-en: 'FREME Dutch-English Legal - v1.1'
ID: smt-12fc5829-3071-4eec-b07d-d7abb8ae1bc7

System for lv-en: 'Latviešu - Angļu (Mežu nozares)'
ID: smt-6b3b1e56-84c9-43f6-86c9-cf18e946b1bd

System for en-fi: 'STP EN-FI NMT (preliminary)'
ID: smt-0e03379f-cfb3-46ba-9397-e6c6c901e808

System for en-sk: 'Codex Global - English-Slovak SMT'
ID: smt-353633c8-aee0-4f41-b7c2-558944d6d5cc

System for fi-et: 'Finnish - Estonian (NMT) v1'
ID: smt-e3f36b28-ef22-42f8-a4fe-da48acd41418

System for en-et: 'English - Estonian IT with Dynamic Learning Enabled'
ID: smt-e15e39d7-61fd-49f2-a50f-d232fb76d4a9

System for en-lt: 'English - Lithuanian IT (v5.7) (Oracle)'
ID: smt-2c5c8950-a46c-4a2f-8f5f-8e1920e62bdd

System for lt-en: 'Lithuanian - English (NMT) IT LT-MT3'
ID: smt-88f0a69d-ad3d-4b6d-bd02-abc89cbecc85

System for en-lt: 'Seimas - En-Lt - NMT'
ID: smt-0964dacf-d3e2-4b00-b36d-f9bfda082389

System for lt-en: 'LT-EN (General) NMT'
ID: smt-67631dc9-0bb7-4999-b12d-eac6ae189db9

System for en-et: 'English - Estonian IT (v5.3)'
ID: smt-effb3bd7-08e3-40b3-9bad-da62b4e394b9

System for en-lv: 'TB2015 EN-LV v02.1'
ID: smt-5599bc15-6f44-4b89-a465-4930fb2ba47f

System for en-lt: 'English - Lithuanian IT (v6.0) (Oracle) – Decode XML entities' ON – bez vecajiem LocDoc'
ID: smt-ab369469-901f-49e8-a852-b4614d376695

System for lv-en: 'ERAF-MT2 angļu kultūra'
ID: smt-4da2a903-6e2a-4ab8-961b-f8c1b8e5e16d

System for de-lt: '[LT-MT3] Systems to build data for Probability dictionaries '
ID: smt-0deac4c5-3131-49c1-ae34-0bccca424c71

System for en-et: '[OBSOLETE] inglise - eesti NMT system'
ID: smt-35abecbd-565f-44e3-9999-b6decc5a6eac

System for da-en: 'Danish - English (NMT) AGA'
ID: smt-8c11e4ce-91cc-4ae0-8cac-f0fea4302361

System for en-sv: 'English - Swedish (NMT) CircleK'
ID: smt-c6bb32b3-5d1a-456a-89bf-698a397ad736

System for lt-ru: 'Lt-Ru (General) NMT'
ID: smt-41d3bdfd-e58a-4879-917b-9cb9c5705888

System for en-et: 'Automotive'
ID: smt-ceedf681-ecfd-469e-b737-a252975bffd4

System for da-en: 'Yet another DA-EN baseline v2'
ID: smt-aebcc383-65c6-499b-bfe6-6d5e2a10fe81

System for lv-en: 'Linearis - Lv-En - NMT'
ID: smt-d1455a59-2299-4eb2-b87f-f29071ea9815

System for en-pl: 'English - Polish INTERTEXT Legal & Finance (NMT)'
ID: smt-7fc23609-86f7-4199-a438-1e2e902f4bd3

System for fr-lt: 'French - Lithuanian (NMT) Legal LT-MT3'
ID: smt-690a7c17-c78b-47c4-9127-7230f6e0db32

System for en-lt: 'English - Lithuanian IT (v5.6) (Oracle)'
ID: smt-d7304849-f9c7-4320-87fd-4124427926ab

System for en-lt: 'English - Lithuanian (NMT) WMT 2019'
ID: smt-31f11613-ab4d-43d9-a828-88a98e49d4af

System for en-ro: 'Romanian Presidency - backtranslation data fetch 1'
ID: smt-c8ded83e-8906-4000-8227-2a44a4eaca9a

System for lv-de: 'Latvian - German (NMT)'
ID: smt-f967086d-18e2-4bf3-a4c7-467ab2079598

System for en-lv: 'TB2016 EN-LV v0.1 (less data)'
ID: smt-4bf418df-aaba-4e7e-8047-a2ad55a2b382

System for de-fr: 'German - French (NMT) Hieronymus'
ID: smt-2dd7addb-7a9e-4c1c-8e99-bd2fb12b9245

System for ru-lt: 'Seimas - Ru-Lt - NMT'
ID: smt-617f16f7-1a2c-4305-90a1-70a13bc04f81

System for fi-et: 'FI-ET v0.1'
ID: smt-578d3487-2b96-4032-8b43-a9bd8a583f0a

System for et-en: '[EE EU Presidency 2017] ET-EN - Presidency'
ID: smt-3e62fc51-6df8-4808-9a7f-2149f5fdc4dd

System for pl-en: 'Polish - English (NMT) Diuna (Law & Finance)'
ID: smt-e20e02e8-97c8-40cf-b8de-6db214fd64eb

System for en-lt: 'English - Lithuanian (NMT) IT LT-MT3'
ID: smt-2212985c-28b4-4d84-a1f4-f00ac332874f

System for ja-en: 'Japanese-English SMT'
ID: smt-85b341b2-c40a-4d1b-9297-f247b4f6c22a

System for en-fr: 'EN-FR Baseline NMT'
ID: smt-9fc9ddbe-a714-4281-9e7b-3ebc55e1fad6

System for en-da: 'Yet another EN-DA baseline'
ID: smt-c71c7ccd-37a2-401c-9447-6f18b75cf18a

System for en-ro: 'ro-en-test'
ID: smt-61b820f7-b955-4676-b517-09e32898fd51

System for en-lt: 'English - Lithuanian (NMT) Seimas v2'
ID: smt-df7a882a-1146-4fad-8422-31158a025556

System for de-en: 'DE-EN General System for Kothes - Copy'
ID: smt-93fbccee-71b2-4101-87ad-12c335c27652

System for pl-en: 'Polish - English General (NMT)'
ID: smt-c9b2c200-2370-4e1d-a1da-9e1f489a4217

System for fr-lt: 'FR-LT LT-MT3 - Comparable SMT System'
ID: smt-eb5873b3-9211-496c-9102-0fc2201572d4

System for en-pl: 'English - Polish Pharmacy (NMT) Kontekst'
ID: smt-4ee45ca7-c633-4a43-85f5-dbca960b822d

System for en-lv: 'ODINE - OPUS EMEA test. OPUS EMEA only'
ID: smt-1c96aedd-b80f-414a-974d-f77feb8e1fc7

System for de-fr: 'Hieronymus DE-FR'
ID: smt-71b92f58-8563-48e7-bc16-cb895958aa3b

System for en-bg: 'EU Presidency English-Bulgarian NMT (initial)'
ID: smt-d44cd645-bc44-4c65-820e-65c4425a0f46

System for lt-en: 'WMT19 LT-EN Data Fetch'
ID: smt-d861be41-8b0b-4ba7-a490-5655ef8fd623

System for en-da: 'Atea - English-Danish NMT'
ID: smt-c3c51e85-7c13-4b70-b314-977890e32fa4

System for en-lv: 'EESC corpus test baseline. DCEP only'
ID: smt-ba6a6583-94e5-431e-b3f4-ac88868450e8

System for fr-lt: 'FR-LT LT-MT3 - data fetch'
ID: smt-edd16e25-c09b-43ad-8884-1dfae01fd2da

System for en-pl: 'Diuna Polish Patents (EN-PL)'
ID: smt-0ea25eca-efae-47b2-8bfb-6d0f1fa3619d

System for en-de: 'Austrian presidency mono data Only good newlines - German'
ID: smt-b408b9d6-d50a-4418-83fa-7a3b8b24b669

System for en-cs: 'English - Czech (NMT) Presto'
ID: smt-5c06cd10-bb5e-43b1-9889-dc40285e3756

System for lv-ru: 'Tests #1 LV-RU 'sinonīmu vārdnīcas' izveidei'
ID: smt-bb583163-9172-4e72-a913-2d1695fd5830

System for en-lt: 'WMT19 EN-LT SMT Baseline V2'
ID: smt-bc6d1bad-1b46-4307-abb9-b95f037d69cd

System for lt-ru: 'LT-MT3 LT-RU Data Fetch'
ID: smt-999f76d0-6ab2-4e2b-ae1d-c575a69012ec

System for lv-en: 'TB Latvian - English (v03)'
ID: smt-f59d9946-924e-47a7-a136-2fe66cfb77ef

System for en-es: 'Printful English-Spanish Data Fetch'
ID: smt-39d8d5f2-8612-4b49-9348-77437a196f82

System for et-en: 'EU Presidency NMT system (ET-EN)'
ID: smt-3ea525f9-e9b4-4b30-8c14-febb66e40f6e

System for ro-en: 'Romanian Presidency data fetch'
ID: smt-28c52ba8-cfb4-424b-8b65-cf43224540a2

System for nl-en: 'Dutch - English (NMT) Lynx'
ID: smt-2eb02c32-1406-45a0-8974-0310becf564b

System for en-fr: 'EN-FR (Canadian) TRSB Adapted System - NMT'
ID: smt-454b7e1f-c3dc-486a-9e63-1541ac8aef50

System for en-es: 'EN-ES [SMT]'
ID: smt-e97495e3-559a-4395-b224-d29e924f7f7c

System for lt-ru: 'IADAATPA LT-RU'
ID: smt-802b1632-95fb-48de-8cd7-e9ddfd116853

System for en-de: 'Codex Global - English-German SMT'
ID: smt-08cd7e6a-6aae-40fb-beba-5101c7b1c5af

System for en-lv: 'EN-LV 2019 data fetch'
ID: smt-c6923cd7-14c9-46ce-995e-bedfc3a1a636

System for lt-fr: 'LT-FR Legal LT-MT3 - data fetch '
ID: smt-2094cec5-9ab2-431f-95ec-11f8d4dff4d4

System for de-en: 'De -En  [NMT]'
ID: smt-160de000-f719-4d5b-9daa-34859345e889

System for pl-en: 'Diuna Law & Finance [data fetch]'
ID: smt-50d1a6be-6e39-40fd-8e7a-47250a1edb4e

System for en-lv: 'Angļu - Latviešu (Vispārēja)'
ID: smt-643d5907-82c2-4a1a-849d-ba7822aef036

System for en-pl: 'English - Polish Patents (NMT)'
ID: smt-90034e3b-fbe3-4f88-8d4d-ff37f3699fe0

System for en-lv: 'ERAF-MT EN-LV (Valsts pārvalde) v0.3'
ID: smt-c31ba483-3284-4a28-bb18-2abbbbb3cd05

System for en-ru: 'CHESS game strategy, notes, comments'
ID: smt-bb9115be-804b-42f4-a52b-8008470c2a21

System for en-sl: 'English-Slovenian'
ID: smt-f9686767-aadd-4a5d-8d31-4daf53dd77a9

System for ru-lv: 'TB RU-LV v.02'
ID: smt-95f5a52c-86fe-467a-b15e-4058df157cf9

System for en-pl: 'Kontekst data fetch'
ID: smt-1dd3547e-0929-4985-a4d3-36a6a2218590

System for en-es: 'English - Spanish (NMT) Lynx'
ID: smt-7f098605-5838-4f84-b73e-94af698c3e00

System for en-fr: 'EN-FR Baseline NMT (fast)'
ID: smt-a5f350f1-351c-4767-a12c-5a7fe5c2dfb0

System for fr-en: 'FR-EN Baseline NMT (fast)'
ID: smt-14c7a262-824b-46c1-8d90-841aa38429b9

System for en-et: '[EE EU Presidency 2017] EN-ET - General Domain v2'
ID: smt-a0623ae4-b245-4e4e-a2ee-0c3b29684020

System for en-et: 'Tilde Localization. English-Estonian IT'
ID: smt-e3a26b6a-a6e6-4b7f-b535-cfee07caac8c

System for en-lv: 'ERAF-MT2 • Paralēlie EN-LV-EN korpusi'
ID: smt-afc76215-6de9-44ec-9ee7-e5562dd2bb15

System for en-pl: 'INTERTEXT Polish Medical (EN-PL)'
ID: smt-3339aff2-ffbd-469b-9fb6-483885c0afd6

System for pl-en: 'Polish - English - GetIT - v9 - Legal NMT'
ID: smt-d03e04f7-d662-4e0a-b9ac-f58e57ff55a7

System for de-en: 'Technik DE_ENGB v2'
ID: smt-39f2a60c-d184-4450-a397-3aea517875ca

'''

Monday, March 2, 2026

Webscrape with Tor and Python

 Technical Implementation of Tor-Based Web Scraping

Setting Up the Tor Environment

The foundation of Tor-based web scraping requires proper configuration of the Tor network environment. The primary setup involves installing the Tor service and configuring the SOCKS5 proxy settings. Install Tor using the package manager:

sudo apt-get install tor

For enhanced control over Tor connections, modify the torrc configuration file located at /etc/tor/torrc:

SOCKSPort 9050
ControlPort 9051
HashedControlPassword your_hashed_password

To enable automatic IP rotation, add these parameters (rotating-tor-http-proxy):

MaxCircuitDirtiness 60
NewCircuitPeriod 30

Implementing Python Tor Controllers

The stem library provides programmatic control over Tor processes. Here's a basic implementation:

from stem import Signal
from stem.control import Controller

def renew_tor_ip():
with Controller.from_port(port=9051) as controller:
controller.authenticate()
controller.signal(Signal.NEWNYM)

For handling HTTP requests through Tor, implement a session manager:

import requests

def create_tor_session():
session = requests.session()
session.proxies = {
'http': 'socks5h://localhost:9050',
'https': 'socks5h://localhost:9050'
}
return session

Multi-threaded Tor Scraping Architecture

Implementing a multi-threaded architecture enhances scraping efficiency while maintaining anonymity (TorScraper):

from concurrent.futures import ThreadPoolExecutor
import queue

class TorScraperPool:
def __init__(self, max_workers=5):
self.executor = ThreadPoolExecutor(max_workers=max_workers)
self.url_queue = queue.Queue()

def add_url(self, url):
self.url_queue.put(url)

def process_urls(self):
futures = []
while not self.url_queue.empty():
url = self.url_queue.get()
future = self.executor.submit(self._scrape_url, url)
futures.append(future)
return futures

Error Handling and Circuit Management

Robust error handling is crucial for maintaining stable Tor connections:

class TorCircuitManager:
def __init__(self, max_retries=3):
self.max_retries = max_retries

def execute_with_retry(self, func):
retries = 0
while retries < self.max_retries:
try:
return func()
except Exception as e:
retries += 1
if retries == self.max_retries:
raise
self._handle_circuit_error()

def _handle_circuit_error(self):
renew_tor_ip()
time.sleep(5) # Allow circuit establishment

Performance Optimization and Rate Limiting

Implement intelligent rate limiting to avoid detection while maintaining performance:

class RateLimiter:
def __init__(self, requests_per_circuit=10):
self.requests_per_circuit = requests_per_circuit
self.current_requests = 0

def should_rotate_circuit(self):
self.current_requests += 1
if self.current_requests >= self.requests_per_circuit:
self.current_requests = 0
return True
return False

Configure dynamic delays based on server response patterns:

def calculate_delay(response_time):
base_delay = 2
if response_time > 5:
return base_delay * 2
return base_delay + random.uniform(0, 1)

The technical implementation focuses on creating a robust, scalable system that maintains anonymity while efficiently scraping data. The architecture supports multiple concurrent connections while implementing necessary safety measures to prevent detection and ensure reliable data collection.

This implementation provides a foundation for building sophisticated scraping systems that can handle various scenarios while maintaining anonymity through the Tor network. The modular design allows for easy expansion and customization based on specific scraping requirements.

Try out ScrapingAnt's residential proxies with millions of IP addresses across 190 countries!

Security and Performance Optimization in Anonymous Web Scraping

Advanced TOR Configuration for Enhanced Privacy

TOR's effectiveness in web scraping depends significantly on proper configuration. The default configuration often leaves security gaps that could compromise anonymity. Implementing advanced TOR configurations can enhance security:

# Example of advanced TOR configuration
proxies = {
'http': 'socks5h://127.0.0.1:9050',
'https': 'socks5h://127.0.0.1:9050'
}
control_port = 9051
password = "your_password_hash"

Implementing proper authentication and control port settings can increase security by up to 40%. Essential configurations include:

  • Enabling Stream Isolation
  • Implementing DNS leak protection
  • Configuring custom exit node selection
  • Setting up bridge relays for additional anonymity

Intelligent Rate Limiting and Request Management

Sophisticated rate limiting strategies are crucial for maintaining anonymity while optimizing performance. Research from ScrapingAnt shows that intelligent rate limiting can increase success rates by up to 95% compared to unrestricted scraping.

Key implementation aspects include:

async def adaptive_rate_limiter(response_time):
base_delay = 2.0
jitter = random.uniform(0.1, 0.5)
dynamic_delay = base_delay * (response_time / 1000)
return min(dynamic_delay + jitter, 10.0)
  • Dynamic delay calculation based on server response times
  • Randomized intervals between requests
  • Adaptive throttling based on server load
  • Circuit switching optimization

Memory-Optimized Data Handling

Efficient memory management is critical when handling large datasets through TOR. Implementation of memory-efficient techniques can reduce RAM usage by up to 60%:

def stream_process_data(url, chunk_size=1024):
with requests.get(url, stream=True, proxies=tor_proxies) as response:
for chunk in response.iter_content(chunk_size=chunk_size):
process_chunk(chunk)

Key optimization strategies include:

  • Implementing generator-based data processing
  • Using chunked transfer encoding
  • Employing memory-mapped files for large datasets
  • Implementing data compression during transfer

Circuit Management and IP Rotation

Advanced circuit management techniques can significantly improve scraping reliability while maintaining anonymity. According to Bored Hacking, proper circuit management can reduce detection rates by up to 75%:

def rotate_circuit():
with Controller.from_port(port=9051) as controller:
controller.authenticate()
controller.signal(Signal.NEWNYM)
time.sleep(controller.get_newnym_wait())

Implementation considerations include:

  • Automated circuit rotation based on usage patterns
  • Exit node country selection
  • Circuit build timeout optimization
  • Parallel circuit preparation

Concurrent Request Optimization

Implementing concurrent requests while maintaining anonymity requires careful balance. Research indicates that properly configured concurrent requests can improve performance by up to 300% without compromising security:

async def concurrent_scraper(urls, max_concurrent=5):
semaphore = asyncio.Semaphore(max_concurrent)
async with aiohttp.ClientSession() as session:
tasks = [
asyncio.create_task(
fetch_with_semaphore(semaphore, session, url)
) for url in urls
]
return await asyncio.gather(*tasks)

Key aspects include:

  • Implementing connection pooling
  • Managing concurrent circuit creation
  • Optimizing resource allocation
  • Implementing request queuing and prioritization

The implementation of these security and performance optimizations must be carefully balanced to maintain anonymity while achieving acceptable performance levels. Regular monitoring and adjustment of these parameters ensure optimal operation as network conditions and target site behaviors change. Source: https://scrapingant.com

Monday, February 23, 2026

llama-cpp-python for HuggingFace Spaces

 # llama-cpp-python Prebuilt Wheels for HuggingFace Spaces (Free CPU)

Prebuilt `llama-cpp-python` wheels optimized for HuggingFace Spaces free tier (16GB RAM, 2 vCPU, CPU-only).

## Purpose

These wheels include the latest llama.cpp backend with support for newer model architectures:
- **LFM2 MoE** architecture (32 experts) for LFM2-8B-A1B
- Latest IQ4_XS quantization support
- OpenBLAS CPU acceleration

## Available Wheels

| Wheel File | Python | Platform | llama.cpp | Features |
|------------|--------|----------|-----------|----------|
| `llama_cpp_python-0.3.22-cp310-cp310-linux_x86_64.whl` | 3.10 | Linux x86_64 | Latest (Jan 2026) | LFM2 MoE, IQ4_XS, OpenBLAS |

## Usage

### Setting Up HuggingFace Spaces with Python 3.10

These wheels are built for **Python 3.10**. To use them in HuggingFace Spaces:

**Step 1: Switch to Docker**
1. Go to your Space settings
2. Change "Space SDK" from **Gradio** to **Docker**
3. This enables custom Dockerfile support

**Step 2: Create a Dockerfile with Python 3.10**

Your Dockerfile should start with `python:3.10-slim` as the base image:

```dockerfile
# Use Python 3.10 explicitly (required for these wheels)
FROM python:3.10-slim

WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    gcc g++ make cmake git libopenblas-dev \
    && rm -rf /var/lib/apt/lists/*

# Install llama-cpp-python from prebuilt wheel
RUN pip install --no-cache-dir \
    https://huggingface.co/Luigi/llama-cpp-python-wheels-hf-spaces-free-cpu/resolve/main/llama_cpp_python-0.3.22-cp310-cp310-linux_x86_64.whl

# Install other dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY . .

# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV GRADIO_SERVER_NAME=0.0.0.0

# Expose Gradio port
EXPOSE 7860

# Run the app
CMD ["python", "app.py"]
```

**Complete Example:** See the template below for a production-ready setup.

### Why Docker SDK?

When you use a custom Dockerfile:
- ✅ Explicit Python version control (`FROM python:3.10-slim`)
- ✅ Full control over system dependencies
- ✅ Can use prebuilt wheels for faster builds
- ✅ No need for `runtime.txt` (Dockerfile takes precedence)

### Dockerfile (Recommended)

```dockerfile
FROM python:3.10-slim

# Install system dependencies for OpenBLAS
RUN apt-get update && apt-get install -y \
    gcc g++ make cmake git libopenblas-dev \
    && rm -rf /var/lib/apt/lists/*

# Install llama-cpp-python from prebuilt wheel (fast)
RUN pip install --no-cache-dir \
    https://huggingface.co/Luigi/llama-cpp-python-wheels-hf-spaces-free-cpu/resolve/main/llama_cpp_python-0.3.22-cp310-cp310-linux_x86_64.whl
```

### With Fallback to Source Build

```dockerfile
# Try prebuilt wheel first, fall back to source build if unavailable
RUN if pip install --no-cache-dir https://huggingface.co/Luigi/llama-cpp-python-wheels-hf-spaces-free-cpu/resolve/main/llama_cpp_python-0.3.22-cp310-cp310-linux_x86_64.whl; then \
    echo "✅ Using prebuilt wheel"; \
    else \
    echo "⚠️  Building from source"; \
    pip install --no-cache-dir git+https://github.com/JamePeng/llama-cpp-python.git@5a0391e8; \
    fi
```

## Why This Fork?

These wheels are built from the **JamePeng/llama-cpp-python** fork (v0.3.22) instead of the official abetlen/llama-cpp-python:

| Repository | Latest Version | llama.cpp | LFM2 MoE Support |
|------------|---------------|-----------|-----------------|
| JamePeng fork | v0.3.22 (Jan 2026) | Latest | ✅ Yes |
| Official (abetlen) | v0.3.16 (Aug 2025) | Outdated | ❌ No |

**Key Difference:** LFM2-8B-A1B requires llama.cpp backend with LFM2 MoE architecture support (added Oct 2025). The official llama-cpp-python hasn't been updated since August 2025.

## Build Configuration

```bash
CMAKE_ARGS="-DGGML_OPENBLAS=ON -DGGML_NATIVE=OFF"
FORCE_CMAKE=1
pip wheel --no-deps git+https://github.com/JamePeng/llama-cpp-python.git@5a0391e8
```

## Supported Models

These wheels enable the following IQ4_XS quantized models:

- **LFM2-8B-A1B** (LiquidAI) - 8.3B params, 1.5B active, MoE with 32 experts
- **Granite-4.0-h-micro** (IBM) - Ultra-fast inference
- **Granite-4.0-h-tiny** (IBM) - Balanced speed/quality
- All standard llama.cpp models (Llama, Gemma, Qwen, etc.)

## Performance

- **Build time savings:** ~4 minutes → 3 seconds (98% faster)
- **Memory footprint:** Fits in 16GB RAM with context up to 8192 tokens
- **CPU acceleration:** OpenBLAS optimized for x86_64

## Limitations

- **CPU-only:** No GPU/CUDA support (optimized for HF Spaces free tier)
- **Platform:** Linux x86_64 only
- **Python:** 3.10 only (matches HF Spaces default)

## License

These wheels include code from:
- [llama-cpp-python](https://github.com/JamePeng/llama-cpp-python) (MIT license)
- [llama.cpp](https://github.com/ggerganov/llama.cpp) (MIT license)

See upstream repositories for full license information.

## Maintenance

Built from: https://github.com/JamePeng/llama-cpp-python/tree/5a0391e8

To rebuild: See `build_wheel.sh` in the main project repository.

## Related

- Main project: [gemma-book-summarizer](https://huggingface.co/spaces/Luigi/gemma-book-summarizer)
- JamePeng fork: https://github.com/JamePeng/llama-cpp-python
- Original project: https://github.com/abetlen/llama-cpp-python

Thursday, February 5, 2026

Build Translation Models with Transformers mT5

Google's mT5 (multilingual Text-to-Text Transfer Transformer) transformer architecture handles 100+ languages with acceptable accuracy.

Build production-ready translation models using mT5, data preparation, model fine-tuning, evaluation metrics and deployment strategies.

What is mT5 and Why Use It for Translation?

mT5 extends Google's T5 architecture to support multilingual tasks. Unlike BERT or GPT models, mT5 treats every problem as text-to-text conversion. This approach works perfectly for translation tasks.

Key Advantages of mT5 Translation Models

Multilingual Support: mT5 handles 101 languages out of the box. You don't need separate models for each language pair.

Transfer Learning: The model learns patterns across languages. Training on high-resource languages improves low-resource translation quality.

Flexible Architecture: The same model architecture works for translation, summarization, and question answering tasks.

Pre-trained Weights: Google provides pre-trained mT5 models. You start with strong baselines instead of random weights.

Prerequisites and Environment Setup

You need Python 3.8+, PyTorch, and the Transformers library. GPU access speeds up training significantly.

# Install required packages
pip install transformers torch datasets evaluate sacrebleu
pip install accelerate wandb  # Optional: for training acceleration and logging
# Import essential libraries
import torch
from transformers import (
    MT5ForConditionalGeneration, 
    MT5Tokenizer, 
    Trainer, 
    TrainingArguments,
    DataCollatorForSeq2Seq
)
from datasets import Dataset, load_dataset
import evaluate
import numpy as np

Check your GPU setup:

# Verify CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Understanding mT5 Architecture for Translation

mT5 uses an encoder-decoder structure. The encoder processes source text, while the decoder generates target translations.

Text-to-Text Format

mT5 requires specific input formatting. Add task prefixes to guide the model:

# Format examples for different translation directions
def format_translation_input(source_text, source_lang, target_lang):
    """Format input text for mT5 translation"""
    prefix = f"translate {source_lang} to {target_lang}: "
    return prefix + source_text

# Examples
english_to_french = format_translation_input("Hello world", "English", "French")
spanish_to_english = format_translation_input("Hola mundo", "Spanish", "English")

print(english_to_french)  # "translate English to French: Hello world"
print(spanish_to_english)  # "translate Spanish to English: Hola mundo"

Data Preparation and Preprocessing

Quality training data determines model performance. We'll use the OPUS dataset, which contains millions of parallel sentences.

Loading Translation Datasets

# Load a sample translation dataset
def load_translation_data(language_pair="en-fr", split="train", max_samples=10000):
    """Load and preprocess translation data"""
    
    # Load OPUS-100 dataset for the language pair
    try:
        dataset = load_dataset("opus100", language_pair, split=split)
        
        # Limit samples for faster training
        if max_samples and len(dataset) > max_samples:
            dataset = dataset.select(range(max_samples))
            
        return dataset
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return None

# Load English-French translation data
train_data = load_translation_data("en-fr", "train", 5000)
val_data = load_translation_data("en-fr", "validation", 1000)

print(f"Training samples: {len(train_data)}")
print(f"Validation samples: {len(val_data)}")

Data Preprocessing Pipeline

class TranslationDataProcessor:
    def __init__(self, tokenizer, source_lang="en", target_lang="fr", max_length=128):
        self.tokenizer = tokenizer
        self.source_lang = source_lang
        self.target_lang = target_lang
        self.max_length = max_length
        
    def preprocess_function(self, examples):
        """Preprocess translation examples for training"""
        
        # Extract source and target texts
        source_texts = examples['translation'][self.source_lang]
        target_texts = examples['translation'][self.target_lang]
        
        # Format inputs with task prefix
        inputs = [
            f"translate {self.source_lang} to {self.target_lang}: {text}" 
            for text in source_texts
        ]
        
        # Tokenize inputs and targets
        model_inputs = self.tokenizer(
            inputs, 
            max_length=self.max_length, 
            truncation=True, 
            padding=True,
            return_tensors="pt"
        )
        
        # Tokenize targets
        with self.tokenizer.as_target_tokenizer():
            labels = self.tokenizer(
                target_texts,
                max_length=self.max_length,
                truncation=True,
                padding=True,
                return_tensors="pt"
            )
        
        model_inputs["labels"] = labels["input_ids"]
        return model_inputs

# Initialize tokenizer and processor
tokenizer = MT5Tokenizer.from_pretrained("google/mt5-small")
processor = TranslationDataProcessor(tokenizer, "en", "fr")

# Process datasets
train_dataset = train_data.map(
    processor.preprocess_function, 
    batched=True,
    remove_columns=train_data.column_names
)

val_dataset = val_data.map(
    processor.preprocess_function,
    batched=True, 
    remove_columns=val_data.column_names
)

Fine-tuning mT5 for Translation

Fine-tuning adapts the pre-trained mT5 model to your specific translation task. We'll use Hugging Face's Trainer class for efficient training.

Model Initialization

# Load pre-trained mT5 model
model = MT5ForConditionalGeneration.from_pretrained("google/mt5-small")

# Move model to GPU if available
model = model.to(device)

print(f"Model parameters: {model.num_parameters():,}")
print(f"Model size: {model.num_parameters() * 4 / 1024**2:.1f} MB")

Training Configuration

# Set up training arguments
training_args = TrainingArguments(
    output_dir="./mt5-translation-model",
    eval_strategy="steps",
    eval_steps=500,
    save_steps=1000,
    logging_steps=100,
    learning_rate=5e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    warmup_steps=500,
    save_total_limit=3,
    load_best_model_at_end=True,
    metric_for_best_model="eval_bleu",
    greater_is_better=True,
    fp16=True,  # Enable mixed precision training
    dataloader_pin_memory=True,
    remove_unused_columns=False,
    report_to="wandb",  # Optional: for experiment tracking
)

Evaluation Metrics Setup

# Load BLEU metric for evaluation
bleu_metric = evaluate.load("sacrebleu")

def compute_metrics(eval_preds):
    """Compute BLEU score for evaluation"""
    predictions, labels = eval_preds
    
    # Decode predictions and labels
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    
    # Replace -100 labels with pad token
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # Compute BLEU score
    result = bleu_metric.compute(
        predictions=decoded_preds, 
        references=[[label] for label in decoded_labels]
    )
    
    return {
        "bleu": result["score"],
        "precisions": result["precisions"],
    }

# Data collator for dynamic padding
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True,
    return_tensors="pt"
)

Training the Model

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# Start training
print("Starting training...")
train_result = trainer.train()

# Save the final model
trainer.save_model()
tokenizer.save_pretrained("./mt5-translation-model")

print(f"Training completed!")
print(f"Final training loss: {train_result.training_loss:.4f}")

Training typically takes 2-4 hours on a modern GPU. Monitor the loss curves to ensure the model converges properly.

Model Evaluation and Performance Testing

Proper evaluation reveals model strengths and weaknesses. We'll use BLEU scores and human-like quality assessments.

Automated Evaluation with BLEU

def evaluate_translation_model(model, tokenizer, test_data, device):
    """Evaluate model performance on test data"""
    
    model.eval()
    predictions = []
    references = []
    
    with torch.no_grad():
        for example in test_data:
            # Prepare input
            source = example['translation']['en']
            target = example['translation']['fr']
            
            input_text = f"translate en to fr: {source}"
            
            # Tokenize input
            inputs = tokenizer(
                input_text, 
                return_tensors="pt", 
                max_length=128, 
                truncation=True
            ).to(device)
            
            # Generate translation
            outputs = model.generate(
                **inputs,
                max_length=128,
                num_beams=4,
                early_stopping=True,
                do_sample=False
            )
            
            # Decode prediction
            prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            predictions.append(prediction)
            references.append([target])
    
    # Calculate BLEU score
    bleu_score = bleu_metric.compute(predictions=predictions, references=references)
    
    return {
        "bleu_score": bleu_score["score"],
        "predictions": predictions[:5],  # First 5 examples
        "references": [ref[0] for ref in references[:5]]
    }

# Load test data
test_data = load_translation_data("en-fr", "test", 500)

# Evaluate model
results = evaluate_translation_model(model, tokenizer, test_data, device)

print(f"BLEU Score: {results['bleu_score']:.2f}")
print("\nSample Translations:")
for i, (pred, ref) in enumerate(zip(results['predictions'], results['references'])):
    print(f"Prediction {i+1}: {pred}")
    print(f"Reference {i+1}: {ref}")
    print("-" * 50)

Quality Assessment Examples

def translate_text(model, tokenizer, text, source_lang="en", target_lang="fr"):
    """Translate a single text using the fine-tuned model"""
    
    # Format input
    input_text = f"translate {source_lang} to {target_lang}: {text}"
    
    # Tokenize
    inputs = tokenizer(
        input_text,
        return_tensors="pt",
        max_length=128,
        truncation=True
    ).to(device)
    
    # Generate translation
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=128,
            num_beams=4,
            temperature=0.7,
            do_sample=True,
            early_stopping=True
        )
    
    # Decode output
    translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translation

# Test various sentence types
test_sentences = [
    "The weather is beautiful today.",
    "Can you help me find the nearest restaurant?",
    "Machine learning transforms how we solve problems.",
    "I love reading books in my free time.",
    "The meeting has been postponed until tomorrow."
]

print("Translation Quality Examples:")
for sentence in test_sentences:
    translation = translate_text(model, tokenizer, sentence)
    print(f"EN: {sentence}")
    print(f"FR: {translation}")
    print("-" * 60)

Deployment and Production Considerations

Moving from training to production requires optimization for speed and resource usage.

Model Optimization

# Optimize model for inference
def optimize_model_for_inference(model):
    """Apply optimizations for faster inference"""
    
    # Set to evaluation mode
    model.eval()
    
    # Compile model (PyTorch 2.0+)
    if hasattr(torch, 'compile'):
        model = torch.compile(model)
    
    return model

# Create inference pipeline
class TranslationPipeline:
    def __init__(self, model_path, device="cuda"):
        self.device = device
        self.tokenizer = MT5Tokenizer.from_pretrained(model_path)
        self.model = MT5ForConditionalGeneration.from_pretrained(model_path)
        self.model = self.model.to(device)
        self.model = optimize_model_for_inference(self.model)
        
    def translate(self, text, source_lang="en", target_lang="fr", **kwargs):
        """Translate text with optimized pipeline"""
        
        input_text = f"translate {source_lang} to {target_lang}: {text}"
        
        inputs = self.tokenizer(
            input_text,
            return_tensors="pt",
            max_length=128,
            truncation=True
        ).to(self.device)
        
        # Generation parameters
        gen_kwargs = {
            "max_length": 128,
            "num_beams": 4,
            "early_stopping": True,
            "do_sample": False,
            **kwargs
        }
        
        with torch.no_grad():
            outputs = self.model.generate(**inputs, **gen_kwargs)
        
        translation = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return translation

# Initialize production pipeline
translator = TranslationPipeline("./mt5-translation-model", device)

# Test production pipeline  
sample_text = "Hello, how are you doing today?"
result = translator.translate(sample_text, "en", "fr")
print(f"Production translation: {result}")

API Deployment Example

# Simple Flask API for translation service
from flask import Flask, request, jsonify
import time

app = Flask(__name__)

# Initialize translator (do this once at startup)
translator = TranslationPipeline("./mt5-translation-model")

@app.route('/translate', methods=['POST'])
def translate_api():
    """API endpoint for translation requests"""
    
    try:
        data = request.get_json()
        
        # Extract parameters
        text = data.get('text', '')
        source_lang = data.get('source_lang', 'en')
        target_lang = data.get('target_lang', 'fr')
        
        # Validate input
        if not text:
            return jsonify({'error': 'Text parameter is required'}), 400
            
        # Measure translation time
        start_time = time.time()
        translation = translator.translate(text, source_lang, target_lang)
        processing_time = time.time() - start_time
        
        return jsonify({
            'translation': translation,
            'source_lang': source_lang,
            'target_lang': target_lang,
            'processing_time': round(processing_time, 3)
        })
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    return jsonify({'status': 'healthy', 'model': 'mT5-translation'})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=False)

Advanced Techniques and Optimizations

Improve model performance with advanced training strategies and architectural modifications.

Multi-GPU Training

# Distributed training setup
from torch.nn.parallel import DistributedDataParallel
from accelerate import Accelerator

def setup_distributed_training():
    """Configure multi-GPU training"""
    
    accelerator = Accelerator()
    
    # Updated training arguments for distributed training
    training_args = TrainingArguments(
        output_dir="./mt5-distributed",
        per_device_train_batch_size=4,  # Smaller batch per GPU
        gradient_accumulation_steps=4,   # Effective batch size = 4*4*num_gpus
        dataloader_pin_memory=True,
        ddp_find_unused_parameters=False,
        **training_args.__dict__  # Inherit other arguments
    )
    
    return accelerator, training_args

Curriculum Learning

def create_curriculum_dataset(dataset, difficulty_fn, stages=3):
    """Create curriculum learning dataset"""
    
    # Calculate difficulty scores
    difficulties = [difficulty_fn(example) for example in dataset]
    
    # Sort by difficulty
    sorted_indices = np.argsort(difficulties)
    
    # Create stages
    stage_size = len(dataset) // stages
    curriculum_stages = []
    
    for i in range(stages):
        start_idx = i * stage_size
        end_idx = (i + 1) * stage_size if i < stages - 1 else len(dataset)
        stage_indices = sorted_indices[start_idx:end_idx]
        curriculum_stages.append(dataset.select(stage_indices))
    
    return curriculum_stages

def sentence_difficulty(example):
    """Simple difficulty metric based on sentence length"""
    source_len = len(example['translation']['en'].split())
    target_len = len(example['translation']['fr'].split())
    return max(source_len, target_len)

Common Issues and Troubleshooting

Building translation models involves several potential pitfalls. Here are solutions to common problems.

Memory Management

# Handle CUDA out of memory errors
def handle_memory_issues():
    """Strategies for managing GPU memory"""
    
    # Clear cache
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # Reduce batch size
    training_args.per_device_train_batch_size = 4
    training_args.gradient_accumulation_steps = 4
    
    # Enable gradient checkpointing
    training_args.gradient_checkpointing = True
    
    # Use FP16 training
    training_args.fp16 = True
    
    print("Applied memory optimization settings")

# Monitor GPU memory usage
def monitor_gpu_memory():
    """Track GPU memory consumption"""
    
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1024**3
        cached = torch.cuda.memory_reserved() / 1024**3
        print(f"GPU Memory - Allocated: {allocated:.2f}GB, Cached: {cached:.2f}GB")

Model Performance Issues

# Debug poor translation quality
def debug_model_performance(model, tokenizer, problem_examples):
    """Analyze model behavior on problematic examples"""
    
    for example in problem_examples:
        source = example['source']
        expected = example['target']
        
        # Get model prediction
        prediction = translate_text(model, tokenizer, source)
        
        # Analyze tokenization
        source_tokens = tokenizer.tokenize(f"translate en to fr: {source}")
        target_tokens = tokenizer.tokenize(expected)
        
        print(f"Source: {source}")
        print(f"Expected: {expected}")
        print(f"Predicted: {prediction}")
        print(f"Source tokens ({len(source_tokens)}): {source_tokens}")
        print(f"Target tokens ({len(target_tokens)}): {target_tokens}")
        print("-" * 80)

# Example problematic cases
problem_cases = [
    {"source": "Bank", "target": "Banque"},  # Ambiguous word
    {"source": "The bank is closed", "target": "La banque est fermée"},
    {"source": "I bank on you", "target": "Je compte sur toi"}
]

debug_model_performance(model, tokenizer, problem_cases)

Comparison with Other Translation Approaches

Understanding mT5's position in the translation landscape helps you make informed decisions.

mT5 vs Traditional Statistical Methods

Statistical Machine Translation (SMT) relies on phrase tables and language models. These systems require extensive parallel corpora and struggle with long-range dependencies.

mT5 Advantages:

  • Handles context better through attention mechanisms
  • Requires less manual feature engineering
  • Transfers knowledge across languages
  • Adapts to domain-specific terminology through fine-tuning

mT5 vs Other Neural Approaches

Sequence-to-Sequence Models with LSTM/GRU architectures preceded transformers. They suffer from vanishing gradients and limited context windows.

BERT-based Translation uses encoder-only architecture. This approach requires additional decoder components and complex training procedures.

mT5 Benefits:

  • Unified text-to-text framework
  • Pre-trained on massive multilingual data
  • Consistent performance across language pairs
  • Simpler fine-tuning process

Performance Benchmarks and Results

Real-world performance data helps set expectations for your mT5 translation models.

BLEU Score Expectations

Language PairmT5-SmallmT5-BasemT5-Large
EN-FR28.532.135.7
EN-DE25.228.932.4
EN-ES31.835.238.6
EN-ZH22.125.729.3

Training Time and Resource Requirements

Model SizeParametersTraining TimeGPU MemoryInference Speed
mT5-Small300M2-4 hours8GB50 tokens/sec
mT5-Base580M6-8 hours16GB35 tokens/sec
mT5-Large1.2B12-16 hours32GB20 tokens/sec

Benchmarks based on 10k training samples, NVIDIA V100 GPU

Future Improvements and Extensions

Your mT5 translation model can grow more sophisticated with additional techniques.

Multilingual Extensions

# Support multiple language pairs in one model
def create_multilingual_dataset(language_pairs):
    """Combine datasets for multiple language pairs"""
    
    combined_dataset = []
    
    for source_lang, target_lang in language_pairs:
        pair_data = load_translation_data(f"{source_lang}-{target_lang}")
        
        # Add language pair information
        for example in pair_data:
            example['source_lang'] = source_lang
            example['target_lang'] = target_lang
            combined_dataset.append(example)
    
    return Dataset.from_list(combined_dataset)

# Create multilingual training data
language_pairs = [("en", "fr"), ("en", "de"), ("en", "es"), ("fr", "de")]
multilingual_data = create_multilingual_dataset(language_pairs)

Domain Adaptation

# Fine-tune for specific domains
def create_domain_specific_data(domain="medical"):
    """Load domain-specific translation data"""
    
    domain_datasets = {
        "medical": "medical_translation_corpus",
        "legal": "legal_translation_corpus", 
        "technical": "technical_translation_corpus"
    }
    
    # Load domain-specific data
    # Implementation depends on your data sources
    pass

# Gradual domain adaptation
def gradual_domain_adaptation(model, general_data, domain_data, steps=3):
    """Gradually adapt model to specific domain"""
    
    # Step 1: Train on general data
    # Step 2: Mix general and domain data (80:20)
    # Step 3: Focus on domain data (20:80)
    pass

Conclusion

Building translation models with mT5 transforms complex multilingual challenges into manageable engineering tasks. You've learned to prepare datasets, fine-tune models, evaluate performance, and deploy production systems.

Key takeaways from this guide:

Start Small: Use mT5-small for prototyping. Scale up to larger models once you validate your approach.

Data Quality Matters: Clean, diverse training data produces better translations than large volumes of noisy text.

Evaluation is Critical: BLEU scores provide baselines, but human evaluation reveals real-world quality.

Optimize for Production: Model compression, caching, and hardware acceleration make deployment viable.

The mT5 architecture handles 100+ languages with consistent quality. Your translation models can now bridge communication gaps across global audiences.

Ready to build your first mT5 translator? Start with the environment setup and work through each section. The code examples provide working implementations you can adapt to your specific needs.

Next Steps: Experiment with different language pairs, explore domain adaptation techniques, and consider implementing real-time translation APIs for your applications.

Source: markaicode.com