Skip to content

Commit

Permalink
Update data_preparation.py
Browse files Browse the repository at this point in the history
  • Loading branch information
fmadore committed Nov 30, 2024
1 parent 80b1861 commit 6ab1f69
Showing 1 changed file with 14 additions and 84 deletions.
98 changes: 14 additions & 84 deletions Visualisations/Overview/items_type_over_years/data_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
# Get the directory of the current script
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))

# Add after SCRIPT_DIR definition
DATA_DIR = os.path.join(SCRIPT_DIR, "data")
os.makedirs(DATA_DIR, exist_ok=True) # Create data directory if it doesn't exist

# Configure logging
logging.basicConfig(
level=logging.INFO,
Expand All @@ -29,8 +33,8 @@ class Config:
ITEMS_PER_PAGE: int = 50
MAX_WORKERS: int = 5
CACHE_DURATION_HOURS: int = 24
CACHE_FILE: Path = field(default_factory=lambda: Path(SCRIPT_DIR) / "data_cache.json")
OUTPUT_FILE: Path = field(default_factory=lambda: Path(SCRIPT_DIR) / "visualization_data.json")
CACHE_FILE: Path = field(default_factory=lambda: Path(DATA_DIR) / "data_cache.json")
OUTPUT_FILE: Path = field(default_factory=lambda: Path(DATA_DIR) / "visualization_data.json")

# Resource class IDs and their translations
ACCEPTABLE_IDS: Dict[int, Dict[str, str]] = field(default_factory=lambda: {
Expand All @@ -50,80 +54,6 @@ class Config:
305: {"en": "Blog article", "fr": "Article de blog"}
})

# Color palette with bilingual labels
COLOR_PALETTE: Dict[str, Dict[str, str]] = field(default_factory=lambda: {
"Blog article": {
"en": "Blog article",
"fr": "Article de blog",
"color": "#FF6B6B"
},
"Press article": {
"en": "Press article",
"fr": "Article de presse",
"color": "#4ECDC4"
},
"Journal article": {
"en": "Journal article",
"fr": "Article de revue",
"color": "#45B7D1"
},
"Book": {
"en": "Book",
"fr": "Livre",
"color": "#96CEB4"
},
"Book review": {
"en": "Book review",
"fr": "Compte rendu de livre",
"color": "#FFEEAD"
},
"Chapter": {
"en": "Chapter",
"fr": "Chapitre",
"color": "#D4A5A5"
},
"Communication": {
"en": "Communication",
"fr": "Communication",
"color": "#9B9B9B"
},
"Edited volume": {
"en": "Edited volume",
"fr": "Ouvrage collectif",
"color": "#FFD93D"
},
"Image": {
"en": "Image",
"fr": "Image",
"color": "#6C5B7B"
},
"Islamic newspaper": {
"en": "Islamic newspaper",
"fr": "Journal islamique",
"color": "#C06C84"
},
"Other document": {
"en": "Other document",
"fr": "Document divers",
"color": "#F8B195"
},
"Report": {
"en": "Report",
"fr": "Rapport",
"color": "#355C7D"
},
"Thesis": {
"en": "Thesis",
"fr": "Thèse",
"color": "#99B898"
},
"Audiovisual document": {
"en": "Audiovisual document",
"fr": "Document audiovisuel",
"color": "#2A363B"
}
})

class DataFetcher:
"""Handles all data fetching operations with caching capability."""

Expand Down Expand Up @@ -236,21 +166,21 @@ def prepare_visualization_data(items_by_year_type: DefaultDict[str, DefaultDict[
visualization_data = {
"yearlyData": [],
"types": [],
"colors": {},
"translations": {
"types": {},
"total": {"en": "Total", "fr": "Total"}
}
}

# Add type information
for type_key, type_info in config.COLOR_PALETTE.items():
visualization_data["types"].append(type_info["en"])
visualization_data["colors"][type_info["en"]] = type_info["color"]
visualization_data["translations"]["types"][type_info["en"]] = {
"en": type_info["en"],
"fr": type_info["fr"]
}
for type_id, translations in config.ACCEPTABLE_IDS.items():
en_name = translations["en"]
if en_name not in visualization_data["types"]:
visualization_data["types"].append(en_name)
visualization_data["translations"]["types"][en_name] = {
"en": translations["en"],
"fr": translations["fr"]
}

# Add yearly data
for year in sorted(items_by_year_type.keys()):
Expand Down

0 comments on commit 6ab1f69

Please sign in to comment.