diff --git a/scraper.py b/scraper.py
index cb18cb6..ac0d244 100644
--- a/scraper.py
+++ b/scraper.py
@@ -68,7 +68,8 @@ def get_obchodneMeno(soup):
data = {}
# find the table
element of "Obchodné meno:"
- meno_tr = soup.find("span", class_="tl", string=re.compile("Obchodné")).parent.parent
+ #meno_tr = soup.find("span", class_="tl", string=re.compile("Obchodné")).parent.parent
+ meno_tr = get_data_td(soup, "Obchodné")
# parse the name and date
active = meno_tr.find_all("span", class_="ra")
@@ -91,6 +92,88 @@ def get_obchodneMeno(soup):
return data
+def get_sidlo(soup):
+ data = {}
+
+ # find the table
element of "Sídlo:"
+ sidlo_tr = get_data_td(soup, "Sídlo")
+ return data
+
+
+def get_ico(soup):
+ data = {}
+
+ return data
+
+
+def get_denZapisu(soup):
+ data = {}
+
+ return data
+
+
+def get_pravnaForma(soup):
+ data = {}
+
+ return data
+
+
+def get_predmetyCinnosti(soup):
+ data = {}
+
+ return data
+
+
+def get_spolocnici(soup):
+ data = {}
+
+ return data
+
+
+def get_vyskaVkladov(soup):
+ data = {}
+
+ return data
+
+
+def get_statutarnyOrgan(soup):
+ data = {}
+
+ return data
+
+
+def get_konanie(soup):
+ data = {}
+
+ return data
+
+
+def get_zakladneImanie(soup):
+ data = {}
+
+ return data
+
+
+def get_aktualizaciaUdajov(soup):
+ data = {}
+
+ return data
+
+
+def get_vypisUdajov(soup):
+ data = {}
+
+ return data
+
+
+def get_data(soup, name):
+ data_td = soup.find("span", class_="tl", string=re.compile(f"{name}")).parent.find_next_sibling("td")
+
+
+ return []
+
+
+
def parse_oddo(text):
"""
Parses the valid_from and valid_until from string
@@ -106,29 +189,30 @@ def parse_oddo(text):
return valid_from, valid_until
+
def test():
- url = "https://www.orsr.sk/vypis.asp?ID=670947&SID=2&P=1"
+ url = "https://www.orsr.sk/vypis.asp?ID=12388&SID=8&P=1"
html = requests.get(url)
soup = BeautifulSoup(html.content, "html.parser")
record = {
- "oddiel": get_oddiel(soup),
- "vlozka": get_vlozka(soup),
- "obchodneMeno": get_obchodneMeno(soup),
- "sidlo": "",
- "ico": "",
- "denZapisu": "",
- "pravnaForma": "",
- "predmetyCinnosti": "",
- "spolocnici": "",
- "vyskaVkladov": "",
- "statutarnyOrgan": "",
- "konanie": "",
- "zakladneImanie": "",
- "aktualizaciaUdajov": "",
- "vypisUdajov": ""
+ "oddiel": get_oddiel(soup),
+ "vlozka": get_vlozka(soup),
+ "obchodneMeno": get_obchodneMeno(soup),
+ "sidlo": get_sidlo(soup),
+ "ico": get_ico(soup),
+ "denZapisu": get_denZapisu(soup),
+ "pravnaForma": get_pravnaForma(soup),
+ "predmetyCinnosti": get_predmetyCinnosti(soup),
+ "spolocnici": get_spolocnici(soup),
+ "vyskaVkladov": get_vyskaVkladov(soup),
+ "statutarnyOrgan": get_statutarnyOrgan(soup),
+ "konanie": get_konanie(soup),
+ "zakladneImanie": get_zakladneImanie(soup),
+ "aktualizaciaUdajov": get_aktualizaciaUdajov(soup),
+ "vypisUdajov": get_vypisUdajov(soup)
}
- print(json.dumps(record,indent=4))
+ print(json.dumps(record,indent=4,ensure_ascii=False))
collection = connect_db()
#collection.bulk_write(soup)
disconnect_db(collection)