Given the tsv (tab delimited file) file data.csv with this content:
English\tRomanian
Hello my name is John.\tSalut, numele meu e John.
Today is Monday.\tAzi e luni.
import pandas as pd
import datetime
def tsv_t_tmx(myfile='data.csv', source_lang='en', target_lang='ro', separator='\t'):
now = datetime.datetime.now()
d2 = now.strftime("%Y-%m-%d %H:%M:%S")
df = pd.read_csv(myfile, sep=separator)
f = open("demofile2.xml", "w", encoding="UTF-8")
f.write('''<?xml version="1.0" encoding="UTF-8" ?>
<tmx version="1.4">''')
f.write(f'''<header creationdate="{d2}"
srclang= "{source_lang}"
adminlang="en"
o-tmf="unknown"
segtype="sentence"
creationtool="Python"
creationtoolversion="unknown"
datatype="PlainText" />
<body>\n''')
for index, row in df.iterrows():
eng = row['English']
rom = row['Romanian']
f.write(f'''
<tu>
<tuv xml:lang="{source_lang}">
<seg>{eng}</seg>
</tuv>
<tuv xml:lang="{target_lang}">
<seg>{rom}</seg>
</tuv>
</tu>''')
f.write('''
</body>
</tmx>''')
f.close()
base_file = "demofile2.xml"
name, ext = base_file.split('.')
new_file = '{}.{}'.format(name, 'tmx')
with open(base_file , 'r') as f1:
with open(new_file, 'w') as f2:
f2.write(f1.read())
if __name__ == "__main__":
tsv_t_tmx()
*****
TMX model:
<tmx version="1.4"><header creationtool="" creationtoolversion="" segtype="phrase" o-tmf="" adminlang="en" srclang="en" datatype="PlainText" o-encoding="UTF-8" /><body><tu><tuv xml:lang="en"><seg /></tuv></tu><tu><tuv xml:lang="en"><seg /></tuv></tu><tu><tuv xml:lang="en"><seg /></tuv></tu></body></tmx>
*****
<?xml version="1.0" encoding="UTF-8" ?>
<tmx version="1.4"><header creationdate="2023-03-16 20:30:30"
srclang= "en"
adminlang="en"
o-tmf="unknown"
segtype="sentence"
creationtool="Python"
creationtoolversion="3.11"
datatype="PlainText" />
<body>
<tu>
<tuv xml:lang="en">
<seg>Hello my name is John.</seg>
</tuv>
<tuv xml:lang="ro">
<seg>Salut, numele meu e John.</seg>
</tuv>
</tu>
<tu>
<tuv xml:lang="en">
<seg>Today is Monday.</seg>
</tuv>
<tuv xml:lang="ro">
<seg>Azi e luni.</seg>
</tuv>
</tu>
</body>
</tmx>