From 7947fc041670c9c18378ec48e5084d54ae4fffb2 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Mon, 29 Jul 2024 07:27:12 +0200 Subject: [PATCH 1/6] [ares] statutar nemusi mit nazev --- data/ares/main.py | 4 +++- data/ares/schema.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/data/ares/main.py b/data/ares/main.py index 1564579..c6e93d3 100644 --- a/data/ares/main.py +++ b/data/ares/main.py @@ -32,7 +32,9 @@ def obj(root): def organi(root, ico, nsmap): - nazev = root.find("./are:Nazev", namespaces=nsmap).text + # ICO 1112 ma prazdny nazev statutaru + nazev_el = root.find("./are:Nazev", namespaces=nsmap) + nazev = nazev_el.text if nazev_el is not None else None fosoby, posoby = [], [] for cl in root.findall("./are:Clen", namespaces=nsmap): diff --git a/data/ares/schema.py b/data/ares/schema.py index 12cffdc..820ed1a 100644 --- a/data/ares/schema.py +++ b/data/ares/schema.py @@ -38,7 +38,7 @@ "posoby", meta, Column("ico", Integer, nullable=False, index=True), - Column("nazev_organu", Text, nullable=False), + Column("nazev_organu", Text, nullable=True), Column("datum_zapisu", Date, nullable=False), Column("datum_vymazu", Date, nullable=True), Column("nazev_funkce", Text, nullable=True), From 7826f914604d3979dd5f9f83b047c2b2068bc4fb Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Mon, 29 Jul 2024 07:53:57 +0200 Subject: [PATCH 2/6] datum a cas vypisu uz nejsou --- data/ares/main.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/data/ares/main.py b/data/ares/main.py index c6e93d3..6ed1fd7 100644 --- a/data/ares/main.py +++ b/data/ares/main.py @@ -97,8 +97,6 @@ def main(outdir: str, partial: bool = False): cols = [ "zdroj", "aktualizace_db", - "datum_vypisu", - "cas_vypisu", "typ_vypisu", "rejstrik", "ico", @@ -160,8 +158,6 @@ def main(outdir: str, partial: bool = False): uvod = vypis.find("./are:Uvod", namespaces=et.nsmap) uvod_cols = [ "Aktualizace_DB", - "Datum_vypisu", - "Cas_vypisu", "Typ_vypisu", ] From 32bc3797009be2b7daa0484b351e4bc41df96700 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Mon, 29 Jul 2024 08:51:42 +0200 Subject: [PATCH 3/6] nedokonci cteni --- data/ares/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/data/ares/main.py b/data/ares/main.py index 6ed1fd7..3f66aa0 100644 --- a/data/ares/main.py +++ b/data/ares/main.py @@ -73,12 +73,16 @@ def remote_data(partial): # pri castecnym loadu stahni jen megabyte if partial: with urlopen(BASE_URL) as r, open(tfn, "wb") as fw: + # v ARES je ted bug, kdy to obcas vraci naprostej garbage + # print(r.headers.__dict__["_headers"]) fw.write(r.read(1000_000)) else: urlretrieve(BASE_URL, tfn) with tarfile.open(tfn, "r:gz") as tf: try: - for el in tf: + for j, el in enumerate(tf): + if partial and j > 100: + break yield (el, tf.extractfile(el).read()) except EOFError: if partial: From 5391289d6190cb719c634e26ec516e1eb46178f1 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Mon, 29 Jul 2024 08:58:09 +0200 Subject: [PATCH 4/6] schema update --- data/ares/schema.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/data/ares/schema.py b/data/ares/schema.py index 820ed1a..c91545e 100644 --- a/data/ares/schema.py +++ b/data/ares/schema.py @@ -9,8 +9,6 @@ meta, Column("zdroj", Text, nullable=False), Column("aktualizace_db", Date, nullable=False), - Column("datum_vypisu", Date, nullable=False), - Column("cas_vypisu", Time, nullable=False), Column("typ_vypisu", Text, nullable=False), Column("rejstrik", Text, nullable=True), Column("ico", Integer, nullable=False, primary_key=True, autoincrement=False), From 32d01814ba6017833210b579e60b05426f0b7d97 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Mon, 29 Jul 2024 09:00:55 +0200 Subject: [PATCH 5/6] unused --- data/ares/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/ares/schema.py b/data/ares/schema.py index c91545e..13008df 100644 --- a/data/ares/schema.py +++ b/data/ares/schema.py @@ -1,5 +1,5 @@ from sqlalchemy import Column, MetaData, Table -from sqlalchemy.sql.sqltypes import JSON, Date, Integer, Text, Time +from sqlalchemy.sql.sqltypes import JSON, Date, Integer, Text meta = MetaData() From 265ca4eb5ea9495e0df7305811657751a396f99b Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Mon, 29 Jul 2024 09:10:36 +0200 Subject: [PATCH 6/6] nullable organ fosoby --- data/ares/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/ares/schema.py b/data/ares/schema.py index 13008df..2ecc19e 100644 --- a/data/ares/schema.py +++ b/data/ares/schema.py @@ -21,7 +21,7 @@ "fosoby", meta, Column("ico", Integer, nullable=False, index=True), - Column("nazev_organu", Text, nullable=False), + Column("nazev_organu", Text, nullable=True), Column("datum_zapisu", Date, nullable=False), Column("datum_vymazu", Date, nullable=True), Column("nazev_funkce", Text, nullable=True),