Skip to content

Commit

Permalink
fix: verifier presence de zonage
Browse files Browse the repository at this point in the history
  • Loading branch information
dhdaines committed Jul 30, 2024
1 parent e2ac5ad commit 2e420c0
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 6 deletions.
10 changes: 6 additions & 4 deletions alexi/analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from collections import deque
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable, Iterator, NamedTuple, Optional
from typing import Iterable, Iterator, NamedTuple, Optional, Union

from pdfplumber.utils.geometry import T_bbox, calculate_area, merge_bboxes

Expand Down Expand Up @@ -481,7 +481,7 @@ def __call__(
return doc


def extract_zonage(doc: Document) -> dict[str, dict[str, dict[str, str]]]:
def extract_zonage(doc: Document) -> Union[dict[str, dict[str, dict[str, str]]], None]:
"""
Extraire les éléments du zonage d'un règlement et générer des
metadonnées pour l'identification des hyperliens et la
Expand All @@ -490,15 +490,15 @@ def extract_zonage(doc: Document) -> dict[str, dict[str, dict[str, str]]]:
mz: Optional[Element] = None
if "Chapitre" not in doc.paliers:
LOGGER.warning("Aucun chapitre présent dans %s", doc.fileid)
return {}
return None
for c in doc.paliers["Chapitre"]:
if "milieux et zones" in c.titre.lower():
LOGGER.info("Extraction de milieux et zones")
mz = c
break
if mz is None:
LOGGER.info("Chapitre milieux et zones non trouvé")
return {}
return None
top = Path(doc.fileid) / "Chapitre" / mz.numero
metadata: dict[str, dict[str, dict[str, str]]] = {
"categorie_milieu": {},
Expand All @@ -520,4 +520,6 @@ def extract_zonage(doc: Document) -> dict[str, dict[str, dict[str, str]]]:
"titre": m.group(2),
"url": str(subsecdir),
}
if len(metadata["categorie_milieu"]) == 0 and len(metadata["milieu"]) == 0:
return None
return metadata
4 changes: 3 additions & 1 deletion alexi/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,9 @@ def __call__(self, path: Path) -> Union[Document, None]:
if self.pdfdata:
doc.pdfurl = self.pdfdata.get(pdf_path.name, {}).get("url", None)
if "zonage" in doc.titre.lower() and "zonage" not in self.metadata:
self.metadata["zonage"] = extract_zonage(doc)
zonage = extract_zonage(doc)
if zonage is not None:
self.metadata["zonage"] = zonage
return doc

def analyse(self, iob: Iterable[T_obj], pdf_path: Path):
Expand Down
2 changes: 1 addition & 1 deletion alexi/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(self, metadata: Optional[dict] = None):
self.metadata = {"docs": {}} if metadata is None else metadata
self.numeros = {}
self.titles = {}
self.urls = set()
self.urls: set[str] = set()
for docpath, info in self.metadata["docs"].items():
self.numeros[info["numero"]] = docpath
self.titles[normalize_title(info["titre"])] = docpath
Expand Down

0 comments on commit 2e420c0

Please sign in to comment.