From 6b9f47b0a584dc17d7823ba101f219952a9ddf9f Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Tue, 18 Jun 2019 19:31:07 +0200 Subject: [PATCH] [volby] kraje okrsky (#59) --- data/volby/registry/init.sql | 44 +++++++++++++++++++++++++ data/volby/registry/mapping.json | 55 +++++++++++++++++++++++++++++++- data/volby/registry/parse.py | 20 ++++++++---- 3 files changed, 112 insertions(+), 7 deletions(-) diff --git a/data/volby/registry/init.sql b/data/volby/registry/init.sql index 7803107..3a6e769 100644 --- a/data/volby/registry/init.sql +++ b/data/volby/registry/init.sql @@ -302,6 +302,50 @@ create table volby.kraje_strany_kraje ( slozneplat varchar ); +DROP TABLE IF EXISTS volby.kraje_okrsky_prehled; + +CREATE TABLE volby.kraje_okrsky_prehled ( + datum date, + ID_OKRSKY smallint, + TYP_FORM smallint NOT NULL, + OPRAVA smallint NOT NULL, + CHYBA smallint NOT NULL, + OKRES smallint NOT NULL, + OBEC int NOT NULL, + OKRSEK int NOT NULL, + KC_1 int NOT NULL, + VOL_SEZNAM smallint NOT NULL, + VYD_OBALKY smallint NOT NULL, + ODEVZ_OBAL smallint NOT NULL, + PL_HL_CELK smallint NOT NULL, + KC_2 int NOT NULL, + ZAKRSTRANA bit varying(100) NOT NULL +); + +DROP TABLE IF EXISTS volby.kraje_okrsky_hlasy; + +CREATE TABLE volby.kraje_okrsky_hlasy ( + datum date, + ID_OKRSKY smallint, + TYP_FORM smallint NOT NULL, + OPRAVA smallint NOT NULL, + CHYBA smallint NOT NULL, + OKRES smallint NOT NULL, + OBEC int NOT NULL, + OKRSEK int NOT NULL, + KC_1 int NOT NULL, + KSTRANA smallint NOT NULL, + POC_HLASU smallint NOT NULL, + KC_2 int NOT NULL, + KC_3 int NOT NULL, + KC_4 int NOT NULL, + KC_5 int NOT NULL, + POSL_KAND smallint NOT NULL, + KC_SUM int NOT NULL, + STRANY int[] NOT NULL, + HLASY int[] NOT NULL +); + drop table if exists volby.ep_kandidati; create table volby.ep_kandidati ( diff --git a/data/volby/registry/mapping.json b/data/volby/registry/mapping.json index e29a0a6..a98af71 100644 --- a/data/volby/registry/mapping.json +++ b/data/volby/registry/mapping.json @@ -364,7 +364,8 @@ ], "2016-10-07": [ "https://volby.cz/opendata/kz2016/KZ2016reg20161008.zip", - "https://volby.cz/opendata/kz2016/KZ2016ciselniky20161007.zip" + "https://volby.cz/opendata/kz2016/KZ2016ciselniky20161007.zip", + "https://volby.cz/opendata/kz2016/KZ2016data20161008.zip" ] }, "ds": { @@ -444,6 +445,58 @@ "PLATNOST", "SLOZNEPLAT" ] + }, + "okrsky_prehled": { + "fn": [ + "KZT6.dbf", + "kzt6.xml" + ], + "klic": "KZ_T6_ROW", + "schema": [ + "ID_OKRSKY", + "TYP_FORM", + "OPRAVA", + "CHYBA", + "OKRES", + "OBEC", + "OKRSEK", + "KC_1", + "VOL_SEZNAM", + "VYD_OBALKY", + "ODEVZ_OBAL", + "PL_HL_CELK", + "KC_2", + "ZAKRSTRANA" + ] + }, + "okrsky_hlasy": { + "fn": [ + "KZT6p.dbf", + "kzt6p-*.xml" + ], + "klic": "KZ_T6P_ROW", + "schema": [ + "ID_OKRSKY", + "TYP_FORM", + "OPRAVA", + "CHYBA", + "OKRES", + "OBEC", + "OKRSEK", + "KC_1", + "KSTRANA", + "POC_HLASU", + "KC_2", + "KC_3", + "KC_4", + "KC_5", + "POSL_KAND", + "KC_SUM" + ], + "extra_schema": [ + "STRANY", + "HLASY" + ] } } }, diff --git a/data/volby/registry/parse.py b/data/volby/registry/parse.py index 431638d..185f595 100644 --- a/data/volby/registry/parse.py +++ b/data/volby/registry/parse.py @@ -3,7 +3,9 @@ import json import os import shutil +import sys import zipfile +from fnmatch import fnmatch from urllib.request import urlopen, Request from contextlib import contextmanager from tempfile import NamedTemporaryFile @@ -55,6 +57,7 @@ def extract_elements(zf, fn, nodename): qq = [] sch=[] for volby, mp in mps.items(): + if len(sys.argv) > 1 and volby != sys.argv[1]: continue print(volby) csv_dir = f'data/csv/{volby}' os.makedirs(csv_dir, exist_ok=True) @@ -72,14 +75,19 @@ def extract_elements(zf, fn, nodename): for url in urls: with load_remote_data(url) as zf: for ff in map(lambda x: x.filename, zf.filelist): - if ff not in fnmap: continue - ds, fmp = fnmap[ff] + patterns = [j for j in fnmap.keys() if fnmatch(ff, j)] + if len(patterns) == 0: continue + if len(patterns) > 1: + raise KeyError('ambiguous keys: {}'.format(patterns)) + + ds, fmp = fnmap.get(patterns[0]) tfn = os.path.join(csv_dir, f'{datum}_{ds}.csv') qq.append(f"echo {tfn}\ncat {tfn} | psql -c 'copy volby.{volby}_{ds} from stdin csv header'") - if os.path.isfile(tfn): continue # TODO: smaz - with open(tfn, 'w', encoding='utf8') as fw: + fnexists = os.path.isfile(tfn) + with open(tfn, 'a+', encoding='utf8') as fw: cw = csv.DictWriter(fw, fieldnames=['DATUM'] + fmp['schema'] + fmp.get('extra_schema', [])) - cw.writeheader() + if not fnexists: + cw.writeheader() for el in extract_elements(zf, ff, fmp['klic']): for k in fmp.get('vynechej', []): el.pop(k, None) @@ -107,4 +115,4 @@ def extract_elements(zf, fn, nodename): with open('copy.sh', 'w', encoding='utf8') as f: f.write('psql < init.sql\n') - f.write('\n'.join(qq)) + f.write('\n'.join(sorted(list(set(qq)))))