parsing one entry

This commit is contained in:
2023-09-27 18:11:10 +02:00
parent 91fab836bb
commit b34aa10521

View File

@@ -68,8 +68,7 @@ def get_obchodneMeno(soup):
data = {}
# find the table <tr> element of "Obchodné meno:"
#meno_tr = soup.find("span", class_="tl", string=re.compile("Obchodné")).parent.parent
meno_tr = get_data_td(soup, "Obchodné")
meno_tr = soup.find("span", class_="tl", string=re.compile("Obchodné")).parent.parent
# parse the name and date
active = meno_tr.find_all("span", class_="ra")
@@ -96,7 +95,6 @@ def get_sidlo(soup):
data = {}
# find the table <tr> element of "Sídlo:"
sidlo_tr = get_data_td(soup, "Sídlo")
return data
@@ -166,12 +164,51 @@ def get_vypisUdajov(soup):
return data
def get_data(soup, name):
def process_entry(entry, value_type):
"""
extracts one entry from the table of entries for a given data
:param entry: one table element of data
:param value_type: type of the value data
:return: tuple: (value, valid_from, valid_until, active)
"""
value, valid_from, valid_until, active = None, None, None, False
value_td, valid_td = entry.find_all("td")
# Check if active entry
if value_td.span.attrs["class"][0] == "ra":
active = True
return value, valid_from, valid_until, active
def get_data(soup, name, value_type="text", allow_multiple_active=True):
data_td = soup.find("span", class_="tl", string=re.compile(f"{name}")).parent.find_next_sibling("td")
data = {}
return []
values = []
old_values = []
for entry in data_td.find_all("table"):
value, valid_from, valid_until, active = process_entry(entry, value_type)
if value is None:
continue
if active:
values.append({"value": value, "valid_from": valid_from, "valid_until": valid_until})
else:
old_values.append({"value": value, "valid_from": valid_from, "valid_until": valid_until})
if not allow_multiple_active:
data.update(values[0])
else:
data.update({"values": values})
data.update({"old_values": old_values})
return data
def parse_oddo(text):