Friday, October 11, 2024

Python Webscrape tr-ex.me

import requests

from bs4 import BeautifulSoup


user_agent = 'Mozilla/5 (Solaris 10) Gecko'

headers = { 'User-Agent' : user_agent }

# values = {'s' : sys.argv[1] }

word = 'masă'

# https://tr-ex.me/translation/romanian-english/casă?p=1&page=1&tm=ptable_exact&translation=&h=110d823af35d34cae60fd423dd67762a&target_filter


url = f'https://tr-ex.me/translation/romanian-english/{word}'

response = requests.get(url, headers=headers)

pool = BeautifulSoup(response.text, 'html.parser')

# print(pool)


if pool.find('span', attrs={'class' : 'context-not-found-text'}):

print("Word not found in trex!")

# exit()

else:

print("Automatic word extractions:")

wordresults = pool.find('div', attrs={'class' : 'translations-wrapper'}).find_all('div', attrs={'class' : 'translation-wrapper'})

for word in wordresults:

print(word.find('a', attrs={'class' : 'translation'}).find('span', attrs={'class' : 'text'}).text)


print("Related 2-word phrases")

for bigrams in pool.find_all('div', attrs={'class' : 'context-examples'}):

for bigram in bigrams.find_all('a', attrs={'class' : 'context-example'}):

print(bigram.text)