From c4abb263085f5a824d7507a5c0353188b73ec3b5 Mon Sep 17 00:00:00 2001 From: Alexander Bilz Date: Sun, 30 Jun 2024 12:58:40 +0200 Subject: [PATCH] fix: JSON decode error (#76) * fix: JSON decode error --- src/forensicsim/backend.py | 4 +--- src/forensicsim/parser.py | 28 ++++++++++++++++++---------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/forensicsim/backend.py b/src/forensicsim/backend.py index eef8ea6..70c0906 100644 --- a/src/forensicsim/backend.py +++ b/src/forensicsim/backend.py @@ -209,8 +209,6 @@ def write_results_to_json(data: list[dict[str, Any]], outputpath: Path) -> None: # Dump messages into a json file try: with open(outputpath, "w", encoding="utf-8") as f: - json.dump( - data, f, indent=4, sort_keys=True, default=str, ensure_ascii=False - ) + json.dump(data, f, indent=4, default=str, ensure_ascii=False) except OSError as e: print(e) diff --git a/src/forensicsim/parser.py b/src/forensicsim/parser.py index 3ec3084..f7fadad 100644 --- a/src/forensicsim/parser.py +++ b/src/forensicsim/parser.py @@ -2,6 +2,7 @@ import warnings from dataclasses import dataclass, field from datetime import datetime +from json import JSONDecodeError from pathlib import Path from typing import Any, Optional, Union @@ -26,15 +27,22 @@ def strip_html_tags(value: str) -> str: def decode_dict(properties: Union[bytes, str, dict]) -> dict[str, Any]: - if isinstance(properties, bytes): - soup = BeautifulSoup(properties, features="html.parser") - properties = properties.decode(soup.original_encoding) - if isinstance(properties, dict): - # handle case where nested childs are dicts or list but provided with "" but have to be expanded. - for key, value in properties.items(): - if isinstance(value, str) and value.startswith(("[", "{")): - properties[key] = json.loads(value, strict=False) - return properties + try: + if isinstance(properties, bytes): + soup = BeautifulSoup(properties, features="html.parser") + properties = properties.decode( + encoding=soup.original_encoding, errors="ignore" + ) + if isinstance(properties, dict): + # handle case where nested childs are dicts or list but provided with "" but have to be expanded. + for key, value in properties.items(): + if isinstance(value, str) and value.startswith(("[", "{")): + properties[key] = json.loads(value, strict=False) + return properties + except JSONDecodeError as e: + print(e) + print("Couldn't decode dictionary ", properties) + return {} return json.loads(properties, strict=False) @@ -292,7 +300,7 @@ def _parse_reply_chains(reply_chains: list[dict], version: str) -> set[Message]: elif version == "v2": rc |= {"cached_deduplication_key": md.get("dedupeKey")} rc |= {"clientmessageid": md.get("clientMessageId")} - # set to clientArrivalTime as compose Time is no longer present + # set to clientArrivalTime as compose time is no longer present rc |= {"composetime": md.get("clientArrivalTime")} rc |= {"contenttype": md.get("contentType")} # set to clientArrivalTime as created time is no longer present