import requests
import re
from bs4 import BeautifulSoup
from tqdm.auto import tqdm
from concurrent.futures import ThreadPoolExecutor

from app.config import settings
from app.db import connect_db, disconnect_db
from time import sleep


def scrape_orsr():
    """
    This is the main function that scrapes data from endpoint defined in config and stores it in mongodb.
    """
    # get all links to "Aktuálny" from the orsr url
    html = requests.get(settings["orsr_url"])
    soup = BeautifulSoup(html.content, "html.parser")
    records = soup.find_all("a", string="Aktuálny")
    records = [record["href"] for record in records]

    # distribute the work in #of threads defined in config
    worker_ids = list(range(1, len(records)+1))
    parts = [worker_ids[i::settings["threads"]] for i in range(settings["threads"])]

    with ThreadPoolExecutor() as t:
        for thread_id, part in enumerate(parts):
            t.submit(process_records, part, thread_id+1)


def process_records(records, thread):
    """
    worker for processing records in a thread
    :param records: list of urls of records to proceses
    :param thread: thread id of processing thread
    """
    data = []
    for i in tqdm(range(len(records)), desc=f"thread {thread}"):
        record = process_record(records[i])
        data.append(record)
    collection = connect_db()
    collection.bulk_write(data)
    disconnect_db(collection)


def process_record(url):
    """
    process one record. Scrape url and store data to mongodb
    :param url: url of the record
    :return dictionary of parameters
    """
    html = requests.get(url)
    soup = BeautifulSoup(html.content, "html.parser")


def test():
    url = "https://www.orsr.sk/vypis.asp?ID=648444&SID=9&P=0"
    html = requests.get(url)
    soup = BeautifulSoup(html.content, "html.parser")

    '''
    record = {
        "oddiel":               soup.find("span", string=re.compile("Oddiel:")),
        "vlozka":               pass,
        "obchodneMeno":         pass,
        "sidlo":                pass,
        "ico":                  pass,
        "denZapisu":            pass,
        "pravnaForma":          pass,
        "predmetyCinnosti":     pass,
        "spolocnici":           pass,
        "vyskaVkladov":         pass,
        "statutarnyOrgan":      pass,
        "konanie":              pass,
        "zakladneImanie":       pass,
        "aktualizaciaUdajov":   pass,
        "vypisUdajov":          pass
    }
    '''
    collection = connect_db()
    #collection.bulk_write(soup)
    disconnect_db(collection)


if __name__ == "__main__":
    #scrape_orsr()
    test()