from bs4 import BeautifulSoup
# XML data
xml_data = """<cG><c>1</c><trG><tr type="origination">letco</tr><dt>2020-08-30T19:12:50</dt></trG><trG><tr type="modification">letco</tr><dt>2020-08-30T19:12:58</dt></trG><lG><l lang="DE" type="German"/><tG><t>Pelletpresse</t><trG><tr type="origination">letco</tr><dt>2020-08-30T19:12:50</dt></trG><trG><tr type="modification">letco</tr><dt>2020-08-30T19:12:50</dt></trG></tG></lG><lG><l lang="RO" type="Romanian"/><tG><t>presă de peleți</t><trG><tr type="origination">letco</tr><dt>2020-08-30T19:12:58</dt></trG><trG><tr type="modification">letco</tr><dt>2020-08-30T19:12:58</dt></trG></tG></lG></cG>"""
# Parse the XML
soup = BeautifulSoup(xml_data, "xml")
# Function to extract translation details
def parse_translation(translation_element):
lang = translation_element.find("l").attrs
text = translation_element.find("t").text
transactions = [
{
"type": tr.find("tr")["type"],
"actor": tr.find("tr").text,
"datetime": tr.find("dt").text,
}
for tr in translation_element.find_all("trG")
]
return {
"lang": lang["lang"],
"type": lang["type"],
"text": text,
"transactions": transactions,
}
# Extract the main data
data = {
"c": soup.find("c").text,
"transactions": [
{
"type": tr.find("tr")["type"],
"actor": tr.find("tr").text,
"datetime": tr.find("dt").text,
}
for tr in soup.find_all("trG", recursive=False)
],
"translations": [parse_translation(lG) for lG in soup.find_all("lG")],
}
# Output the extracted data
print(data)
from xml.etree import ElementTree as ET
# Parse the XML
root = ET.fromstring(xml_data)
# Function to extract translation details
def parse_translation(translation_element):
lang = translation_element.find("./l").attrib
text = translation_element.find("./tG/t").text
transactions = [
{
"type": tr.find("./tr").attrib["type"],
"actor": tr.find("./tr").text,
"datetime": tr.find("./dt").text,
}
for tr in translation_element.findall("./tG/trG")
]
return {
"lang": lang["lang"],
"type": lang["type"],
"text": text,
"transactions": transactions,
}
# Extract the main data
data = {
"c": root.find("./c").text,
"transactions": [
{
"type": tr.find("./tr").attrib["type"],
"actor": tr.find("./tr").text,
"datetime": tr.find("./dt").text,
}
for tr in root.findall("./trG")
],
"translations": [parse_translation(lG) for lG in root.findall("./lG")],
}
# Output the extracted data
print(data)