parsing one entry
This commit is contained in:
47
scraper.py
47
scraper.py
@@ -68,8 +68,7 @@ def get_obchodneMeno(soup):
|
||||
data = {}
|
||||
|
||||
# find the table <tr> element of "Obchodné meno:"
|
||||
#meno_tr = soup.find("span", class_="tl", string=re.compile("Obchodné")).parent.parent
|
||||
meno_tr = get_data_td(soup, "Obchodné")
|
||||
meno_tr = soup.find("span", class_="tl", string=re.compile("Obchodné")).parent.parent
|
||||
|
||||
# parse the name and date
|
||||
active = meno_tr.find_all("span", class_="ra")
|
||||
@@ -96,7 +95,6 @@ def get_sidlo(soup):
|
||||
data = {}
|
||||
|
||||
# find the table <tr> element of "Sídlo:"
|
||||
sidlo_tr = get_data_td(soup, "Sídlo")
|
||||
return data
|
||||
|
||||
|
||||
@@ -166,12 +164,51 @@ def get_vypisUdajov(soup):
|
||||
return data
|
||||
|
||||
|
||||
def get_data(soup, name):
|
||||
def process_entry(entry, value_type):
|
||||
"""
|
||||
extracts one entry from the table of entries for a given data
|
||||
:param entry: one table element of data
|
||||
:param value_type: type of the value data
|
||||
:return: tuple: (value, valid_from, valid_until, active)
|
||||
|
||||
"""
|
||||
value, valid_from, valid_until, active = None, None, None, False
|
||||
|
||||
value_td, valid_td = entry.find_all("td")
|
||||
|
||||
# Check if active entry
|
||||
if value_td.span.attrs["class"][0] == "ra":
|
||||
active = True
|
||||
|
||||
|
||||
|
||||
return value, valid_from, valid_until, active
|
||||
|
||||
|
||||
def get_data(soup, name, value_type="text", allow_multiple_active=True):
|
||||
data_td = soup.find("span", class_="tl", string=re.compile(f"{name}")).parent.find_next_sibling("td")
|
||||
|
||||
data = {}
|
||||
|
||||
return []
|
||||
values = []
|
||||
old_values = []
|
||||
|
||||
for entry in data_td.find_all("table"):
|
||||
value, valid_from, valid_until, active = process_entry(entry, value_type)
|
||||
if value is None:
|
||||
continue
|
||||
if active:
|
||||
values.append({"value": value, "valid_from": valid_from, "valid_until": valid_until})
|
||||
else:
|
||||
old_values.append({"value": value, "valid_from": valid_from, "valid_until": valid_until})
|
||||
|
||||
if not allow_multiple_active:
|
||||
data.update(values[0])
|
||||
else:
|
||||
data.update({"values": values})
|
||||
data.update({"old_values": old_values})
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def parse_oddo(text):
|
||||
|
||||
Reference in New Issue
Block a user