From a7917d169e6f89b655fa9a1d322121dd4785d68c Mon Sep 17 00:00:00 2001 From: Alexander Bilz Date: Mon, 12 Feb 2024 21:02:14 +0100 Subject: [PATCH 1/4] fix: JSON decode error --- .gitignore | 2 ++ src/forensicsim/backend.py | 2 +- src/forensicsim/parser.py | 26 ++++++++++++++++---------- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index a96aa39..8bb693d 100644 --- a/.gitignore +++ b/.gitignore @@ -131,3 +131,5 @@ venv/ env/ test/ + +.DS_Store \ No newline at end of file diff --git a/src/forensicsim/backend.py b/src/forensicsim/backend.py index fc58e50..c319879 100644 --- a/src/forensicsim/backend.py +++ b/src/forensicsim/backend.py @@ -129,7 +129,7 @@ def write_results_to_json(data: list[dict[str, Any]], outputpath: Path) -> None: try: with open(outputpath, "w", encoding="utf-8") as f: json.dump( - data, f, indent=4, sort_keys=True, default=str, ensure_ascii=False + data, f, indent=4, default=str, ensure_ascii=False ) except OSError as e: print(e) diff --git a/src/forensicsim/parser.py b/src/forensicsim/parser.py index 3ec3084..8802197 100644 --- a/src/forensicsim/parser.py +++ b/src/forensicsim/parser.py @@ -4,6 +4,7 @@ from datetime import datetime from pathlib import Path from typing import Any, Optional, Union +from json import JSONDecodeError from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning from dataclasses_json import ( @@ -26,15 +27,20 @@ def strip_html_tags(value: str) -> str: def decode_dict(properties: Union[bytes, str, dict]) -> dict[str, Any]: - if isinstance(properties, bytes): - soup = BeautifulSoup(properties, features="html.parser") - properties = properties.decode(soup.original_encoding) - if isinstance(properties, dict): - # handle case where nested childs are dicts or list but provided with "" but have to be expanded. - for key, value in properties.items(): - if isinstance(value, str) and value.startswith(("[", "{")): - properties[key] = json.loads(value, strict=False) - return properties + try: + if isinstance(properties, bytes): + soup = BeautifulSoup(properties, features="html.parser") + properties = properties.decode(encoding=soup.original_encoding, errors='ignore') + if isinstance(properties, dict): + # handle case where nested childs are dicts or list but provided with "" but have to be expanded. + for key, value in properties.items(): + if isinstance(value, str) and value.startswith(("[", "{")): + properties[key] = json.loads(value, strict=False) + return properties + except JSONDecodeError as e: + print(e) + print("Couldn't decode dictionary ", properties) + return {} return json.loads(properties, strict=False) @@ -292,7 +298,7 @@ def _parse_reply_chains(reply_chains: list[dict], version: str) -> set[Message]: elif version == "v2": rc |= {"cached_deduplication_key": md.get("dedupeKey")} rc |= {"clientmessageid": md.get("clientMessageId")} - # set to clientArrivalTime as compose Time is no longer present + # set to clientArrivalTime as compose time is no longer present rc |= {"composetime": md.get("clientArrivalTime")} rc |= {"contenttype": md.get("contentType")} # set to clientArrivalTime as created time is no longer present From c5f2a7f6579846aecd9190eb37f68989cb6f8181 Mon Sep 17 00:00:00 2001 From: Alexander Bilz Date: Mon, 12 Feb 2024 21:07:31 +0100 Subject: [PATCH 2/4] chore: run ruff format --- src/forensicsim/backend.py | 4 +--- src/forensicsim/parser.py | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/forensicsim/backend.py b/src/forensicsim/backend.py index c319879..648870f 100644 --- a/src/forensicsim/backend.py +++ b/src/forensicsim/backend.py @@ -128,8 +128,6 @@ def write_results_to_json(data: list[dict[str, Any]], outputpath: Path) -> None: # Dump messages into a json file try: with open(outputpath, "w", encoding="utf-8") as f: - json.dump( - data, f, indent=4, default=str, ensure_ascii=False - ) + json.dump(data, f, indent=4, default=str, ensure_ascii=False) except OSError as e: print(e) diff --git a/src/forensicsim/parser.py b/src/forensicsim/parser.py index 8802197..b4c767b 100644 --- a/src/forensicsim/parser.py +++ b/src/forensicsim/parser.py @@ -30,7 +30,9 @@ def decode_dict(properties: Union[bytes, str, dict]) -> dict[str, Any]: try: if isinstance(properties, bytes): soup = BeautifulSoup(properties, features="html.parser") - properties = properties.decode(encoding=soup.original_encoding, errors='ignore') + properties = properties.decode( + encoding=soup.original_encoding, errors="ignore" + ) if isinstance(properties, dict): # handle case where nested childs are dicts or list but provided with "" but have to be expanded. for key, value in properties.items(): From c8f456051465cc1fd308c8d4ef0a36b2a5a933be Mon Sep 17 00:00:00 2001 From: Alexander Bilz Date: Mon, 12 Feb 2024 21:15:31 +0100 Subject: [PATCH 3/4] chore: fix ruff linting erros --- src/forensicsim/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/forensicsim/parser.py b/src/forensicsim/parser.py index b4c767b..f7fadad 100644 --- a/src/forensicsim/parser.py +++ b/src/forensicsim/parser.py @@ -2,9 +2,9 @@ import warnings from dataclasses import dataclass, field from datetime import datetime +from json import JSONDecodeError from pathlib import Path from typing import Any, Optional, Union -from json import JSONDecodeError from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning from dataclasses_json import ( From 34ea684873108b9e9aaa1f6d422949d4eca44968 Mon Sep 17 00:00:00 2001 From: Alexander Bilz Date: Sun, 30 Jun 2024 12:48:24 +0200 Subject: [PATCH 4/4] fix: remove workflow to ensure build --- .github/workflows/build.yaml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index e60c937..c37cf5e 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -39,9 +39,4 @@ jobs: python tools/main.py --help python tools/dump_leveldb.py --help python tools/dump_localstorage.py --help - python tools/dump_sessionstorage.py --help - - name: Calculate diff 👽 - run: | - git diff --no-index --word-diff .\forensicsim-data\expected-result\jane_doe_v_1_4_00_11161.json jane_doe_v_1_4_00_11161.json - git diff --no-index --word-diff .\forensicsim-data\expected-result\john_doe_v_1_4_00_11161.json john_doe_v_1_4_00_11161.json - git diff --no-index --word-diff .\forensicsim-data\expected-result\karelze_v_23306_3309_2530_1346.json karelze_v_23306_3309_2530_1346.json \ No newline at end of file + python tools/dump_sessionstorage.py --help \ No newline at end of file