Skip to content

Commit

Permalink
queue (netestovano, justice je dole)
Browse files Browse the repository at this point in the history
  • Loading branch information
kokes committed Jul 20, 2022
1 parent 7ed9492 commit 2b0300e
Showing 1 changed file with 21 additions and 11 deletions.
32 changes: 21 additions & 11 deletions data/justice/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import multiprocessing
import os
import re
from queue import Queue
from urllib.parse import urlparse
from urllib.request import urlopen

Expand Down Expand Up @@ -74,7 +75,7 @@ def nahraj_ds(url):
yield from et


def zpracuj_ds(url, schemas, outdir, partial):
def zpracuj_ds(url, schemas, outdir, partial, autogen):
et = nahraj_ds(url)

fs, csvs, schemasd = dict(), dict(), dict()
Expand Down Expand Up @@ -147,7 +148,7 @@ def zpracuj_ds(url, schemas, outdir, partial):
udaj_typ = udaj_raw.find("udajTyp/kod").text

if udaj_typ not in schemasd:
# TODO(PR): queue na schema_autogen
autogen.put((udaj_typ, udaj_raw))
continue

if not schemasd[udaj_typ].get("ignore", False):
Expand All @@ -171,11 +172,7 @@ def zpracuj_ds(url, schemas, outdir, partial):
podudaj_typ = podudaj_raw.find("udajTyp/kod").text

if podudaj_typ not in schemasd:
# TODO(PR): queue na schema_autogen
# schema_autogen[podudaj_typ] = merge(
# gen_schema(podudaj_raw),
# schema_autogen.get(podudaj_typ, {}),
# )
autogen.put((podudaj_typ, podudaj_raw))
continue

if not schemasd[podudaj_typ].get("ignore", False):
Expand Down Expand Up @@ -248,12 +245,18 @@ def main(outdir: str, partial: bool = False):

urls.append(ds_url[0])

urls = [j for j in urls if "sro" not in j] # TODO(PR): remove
cdir = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(cdir, "xml_schema.json"), encoding="utf-8") as f:
schemas = json.load(f)

autogen = Queue()
zpracuj = functools.partial(
zpracuj_ds, schemas=schemas, outdir=outdir, partial=partial
zpracuj_ds,
schemas=schemas,
outdir=outdir,
partial=partial,
autogen=autogen,
)
progress = tqdm(total=len(urls))
# TODO: chcem fakt jet naplno? co kdyz budem parametrizovat jednotlivy moduly?
Expand All @@ -267,9 +270,16 @@ def main(outdir: str, partial: bool = False):
# logging.debug(url)?
progress.update(n=1)

# TODO: resolve
# with open('xml_schema_chybejici.json', 'w') as fw:
# json.dump(schema_autogen, fw, indent=2, ensure_ascii=False)
# nezpracovany objekty je treba rucne projit
schema_autogen = dict()
while not autogen.empty():
obj, raw = autogen.get()
schema_autogen[obj] = merge(
gen_schema(raw),
schema_autogen.get(obj, {}),
)
with open("xml_schema_chybejici.json", "w") as fw:
json.dump(schema_autogen, fw, indent=2, ensure_ascii=False)


if __name__ == "__main__":
Expand Down

0 comments on commit 2b0300e

Please sign in to comment.