From a0a1691b2784ecc32eaa22acea4be2ea6c0e8373 Mon Sep 17 00:00:00 2001 From: Ondrej Kokes Date: Fri, 15 Nov 2024 10:51:13 +0100 Subject: [PATCH] od pro 2021-2027 --- data/eufondy/main.py | 14 ++++++++++---- data/eufondy/schema.py | 4 ++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/data/eufondy/main.py b/data/eufondy/main.py index e9e06e9..d25026f 100644 --- a/data/eufondy/main.py +++ b/data/eufondy/main.py @@ -11,6 +11,7 @@ import re from itertools import zip_longest from urllib.request import urlopen +from functools import partial from lxml.etree import iterparse from openpyxl import load_workbook @@ -274,7 +275,7 @@ def prehled_2007_2013(outdir: str, partial: bool = False): cw.writerow(dt) -def opendata_2014_2020(outdir: str, partial: bool = False): +def opendata_xml(url: str, fn: str, outdir: str, partial: bool = False): sloupce = [ "id", "id_vyzva", @@ -304,11 +305,11 @@ def opendata_2014_2020(outdir: str, partial: bool = False): ] with open( - os.path.join(outdir, "opendata_2014_2020.csv"), "w", encoding="utf8" + os.path.join(outdir, fn), "w", encoding="utf8" ) as fw: cw = csv.DictWriter(fw, fieldnames=sloupce, lineterminator="\n") cw.writeheader() - r = urlopen("https://ms14opendata.mssf.cz/SeznamProjektu.xml", timeout=300) + r = urlopen(url, timeout=300) et = iterparse(r) for j, (action, element) in enumerate(et): @@ -322,13 +323,18 @@ def opendata_2014_2020(outdir: str, partial: bool = False): cw.writerow(projekt) element.clear() +opendata_2014_2020 = partial(opendata_xml, "https://ms14opendata.mssf.cz/SeznamProjektu.xml", "2014_2020.csv") +opendata_2021_2027 = partial(opendata_xml, "https://ms21opendata.mssf.cz/SeznamOperaci_21_27.xml", "2021_2027.csv") # neimplementujem `partial`, protoze tech dat stejne neni moc def main(outdir: str, partial: bool = False): prehled_2007_2013(outdir, partial) prehled_2014_2020(outdir, partial) prehled_2021_2027(outdir, partial) - opendata_2014_2020(outdir, partial) + od_outdir = os.path.join(outdir, "opendata") + os.makedirs(od_outdir, exist_ok=True) + opendata_2014_2020(od_outdir, partial) + opendata_2021_2027(od_outdir, partial) if __name__ == "__main__": diff --git a/data/eufondy/schema.py b/data/eufondy/schema.py index 41266b1..2fe6145 100644 --- a/data/eufondy/schema.py +++ b/data/eufondy/schema.py @@ -5,7 +5,7 @@ schema = [ Table( - "opendata_2014_2020", + "opendata", meta, Column("id", BigInteger, nullable=False), Column("id_vyzva", BigInteger, nullable=False), @@ -16,7 +16,7 @@ Column("problem", Text, nullable=True), Column("cil", Text, nullable=True), Column("datum_zahajeni", Date, nullable=True), - Column("datum_ukonceni_predp", Date, nullable=False), + Column("datum_ukonceni_predp", Date, nullable=True), Column("datum_ukonceni_skut", Date, nullable=True), Column("suk", Text, nullable=False), Column("zadatel_nazev", Text, nullable=False),