parsing one entry
This commit is contained in:
47
scraper.py
47
scraper.py
@@ -68,8 +68,7 @@ def get_obchodneMeno(soup):
|
|||||||
data = {}
|
data = {}
|
||||||
|
|
||||||
# find the table <tr> element of "Obchodné meno:"
|
# find the table <tr> element of "Obchodné meno:"
|
||||||
#meno_tr = soup.find("span", class_="tl", string=re.compile("Obchodné")).parent.parent
|
meno_tr = soup.find("span", class_="tl", string=re.compile("Obchodné")).parent.parent
|
||||||
meno_tr = get_data_td(soup, "Obchodné")
|
|
||||||
|
|
||||||
# parse the name and date
|
# parse the name and date
|
||||||
active = meno_tr.find_all("span", class_="ra")
|
active = meno_tr.find_all("span", class_="ra")
|
||||||
@@ -96,7 +95,6 @@ def get_sidlo(soup):
|
|||||||
data = {}
|
data = {}
|
||||||
|
|
||||||
# find the table <tr> element of "Sídlo:"
|
# find the table <tr> element of "Sídlo:"
|
||||||
sidlo_tr = get_data_td(soup, "Sídlo")
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
@@ -166,12 +164,51 @@ def get_vypisUdajov(soup):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
def get_data(soup, name):
|
def process_entry(entry, value_type):
|
||||||
|
"""
|
||||||
|
extracts one entry from the table of entries for a given data
|
||||||
|
:param entry: one table element of data
|
||||||
|
:param value_type: type of the value data
|
||||||
|
:return: tuple: (value, valid_from, valid_until, active)
|
||||||
|
|
||||||
|
"""
|
||||||
|
value, valid_from, valid_until, active = None, None, None, False
|
||||||
|
|
||||||
|
value_td, valid_td = entry.find_all("td")
|
||||||
|
|
||||||
|
# Check if active entry
|
||||||
|
if value_td.span.attrs["class"][0] == "ra":
|
||||||
|
active = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return value, valid_from, valid_until, active
|
||||||
|
|
||||||
|
|
||||||
|
def get_data(soup, name, value_type="text", allow_multiple_active=True):
|
||||||
data_td = soup.find("span", class_="tl", string=re.compile(f"{name}")).parent.find_next_sibling("td")
|
data_td = soup.find("span", class_="tl", string=re.compile(f"{name}")).parent.find_next_sibling("td")
|
||||||
|
|
||||||
|
data = {}
|
||||||
|
|
||||||
return []
|
values = []
|
||||||
|
old_values = []
|
||||||
|
|
||||||
|
for entry in data_td.find_all("table"):
|
||||||
|
value, valid_from, valid_until, active = process_entry(entry, value_type)
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
if active:
|
||||||
|
values.append({"value": value, "valid_from": valid_from, "valid_until": valid_until})
|
||||||
|
else:
|
||||||
|
old_values.append({"value": value, "valid_from": valid_from, "valid_until": valid_until})
|
||||||
|
|
||||||
|
if not allow_multiple_active:
|
||||||
|
data.update(values[0])
|
||||||
|
else:
|
||||||
|
data.update({"values": values})
|
||||||
|
data.update({"old_values": old_values})
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
def parse_oddo(text):
|
def parse_oddo(text):
|
||||||
|
|||||||
Reference in New Issue
Block a user