diff --git a/data/eufondy/main.py b/data/eufondy/main.py index e9e06e9..6fa017b 100644 --- a/data/eufondy/main.py +++ b/data/eufondy/main.py @@ -9,12 +9,16 @@ import json import os import re +from functools import partial from itertools import zip_longest from urllib.request import urlopen from lxml.etree import iterparse from openpyxl import load_workbook +MS_2014_URL = "https://ms14opendata.mssf.cz/SeznamProjektu.xml" +MS_2021_URL = "https://ms21opendata.mssf.cz/SeznamOperaci_21_27.xml" + # TODO: nechcem strptime? def predatuj(s): @@ -274,7 +278,7 @@ def prehled_2007_2013(outdir: str, partial: bool = False): cw.writerow(dt) -def opendata_2014_2020(outdir: str, partial: bool = False): +def opendata_xml(url: str, fn: str, outdir: str, partial: bool = False): sloupce = [ "id", "id_vyzva", @@ -303,12 +307,10 @@ def opendata_2014_2020(outdir: str, partial: bool = False): "cilove_skupiny", ] - with open( - os.path.join(outdir, "opendata_2014_2020.csv"), "w", encoding="utf8" - ) as fw: + with open(os.path.join(outdir, fn), "w", encoding="utf8") as fw: cw = csv.DictWriter(fw, fieldnames=sloupce, lineterminator="\n") cw.writeheader() - r = urlopen("https://ms14opendata.mssf.cz/SeznamProjektu.xml", timeout=300) + r = urlopen(url, timeout=300) et = iterparse(r) for j, (action, element) in enumerate(et): @@ -323,12 +325,19 @@ def opendata_2014_2020(outdir: str, partial: bool = False): element.clear() +opendata_2014_2020 = partial(opendata_xml, MS_2014_URL, "2014_2020.csv") +opendata_2021_2027 = partial(opendata_xml, MS_2021_URL, "2021_2027.csv") + + # neimplementujem `partial`, protoze tech dat stejne neni moc def main(outdir: str, partial: bool = False): prehled_2007_2013(outdir, partial) prehled_2014_2020(outdir, partial) prehled_2021_2027(outdir, partial) - opendata_2014_2020(outdir, partial) + od_outdir = os.path.join(outdir, "opendata") + os.makedirs(od_outdir, exist_ok=True) + opendata_2014_2020(od_outdir, partial) + opendata_2021_2027(od_outdir, partial) if __name__ == "__main__": diff --git a/data/eufondy/schema.py b/data/eufondy/schema.py index 41266b1..2fe6145 100644 --- a/data/eufondy/schema.py +++ b/data/eufondy/schema.py @@ -5,7 +5,7 @@ schema = [ Table( - "opendata_2014_2020", + "opendata", meta, Column("id", BigInteger, nullable=False), Column("id_vyzva", BigInteger, nullable=False), @@ -16,7 +16,7 @@ Column("problem", Text, nullable=True), Column("cil", Text, nullable=True), Column("datum_zahajeni", Date, nullable=True), - Column("datum_ukonceni_predp", Date, nullable=False), + Column("datum_ukonceni_predp", Date, nullable=True), Column("datum_ukonceni_skut", Date, nullable=True), Column("suk", Text, nullable=False), Column("zadatel_nazev", Text, nullable=False),