Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: JSON decode error #76

Merged
merged 5 commits into from
Jun 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,4 @@ jobs:
python tools/main.py --help
python tools/dump_leveldb.py --help
python tools/dump_localstorage.py --help
python tools/dump_sessionstorage.py --help
- name: Calculate diff 👽
run: |
git diff --no-index --word-diff .\forensicsim-data\expected-result\jane_doe_v_1_4_00_11161.json jane_doe_v_1_4_00_11161.json
git diff --no-index --word-diff .\forensicsim-data\expected-result\john_doe_v_1_4_00_11161.json john_doe_v_1_4_00_11161.json
git diff --no-index --word-diff .\forensicsim-data\expected-result\karelze_v_23306_3309_2530_1346.json karelze_v_23306_3309_2530_1346.json
python tools/dump_sessionstorage.py --help
4 changes: 1 addition & 3 deletions src/forensicsim/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,6 @@ def write_results_to_json(data: list[dict[str, Any]], outputpath: Path) -> None:
# Dump messages into a json file
try:
with open(outputpath, "w", encoding="utf-8") as f:
json.dump(
data, f, indent=4, sort_keys=True, default=str, ensure_ascii=False
)
json.dump(data, f, indent=4, default=str, ensure_ascii=False)
except OSError as e:
print(e)
28 changes: 18 additions & 10 deletions src/forensicsim/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import warnings
from dataclasses import dataclass, field
from datetime import datetime
from json import JSONDecodeError
from pathlib import Path
from typing import Any, Optional, Union

Expand All @@ -26,15 +27,22 @@ def strip_html_tags(value: str) -> str:


def decode_dict(properties: Union[bytes, str, dict]) -> dict[str, Any]:
if isinstance(properties, bytes):
soup = BeautifulSoup(properties, features="html.parser")
properties = properties.decode(soup.original_encoding)
if isinstance(properties, dict):
# handle case where nested childs are dicts or list but provided with "" but have to be expanded.
for key, value in properties.items():
if isinstance(value, str) and value.startswith(("[", "{")):
properties[key] = json.loads(value, strict=False)
return properties
try:
if isinstance(properties, bytes):
soup = BeautifulSoup(properties, features="html.parser")
properties = properties.decode(
encoding=soup.original_encoding, errors="ignore"
)
if isinstance(properties, dict):
# handle case where nested childs are dicts or list but provided with "" but have to be expanded.
for key, value in properties.items():
if isinstance(value, str) and value.startswith(("[", "{")):
properties[key] = json.loads(value, strict=False)
return properties
except JSONDecodeError as e:
print(e)
print("Couldn't decode dictionary ", properties)
return {}

return json.loads(properties, strict=False)

Expand Down Expand Up @@ -292,7 +300,7 @@ def _parse_reply_chains(reply_chains: list[dict], version: str) -> set[Message]:
elif version == "v2":
rc |= {"cached_deduplication_key": md.get("dedupeKey")}
rc |= {"clientmessageid": md.get("clientMessageId")}
# set to clientArrivalTime as compose Time is no longer present
# set to clientArrivalTime as compose time is no longer present
rc |= {"composetime": md.get("clientArrivalTime")}
rc |= {"contenttype": md.get("contentType")}
# set to clientArrivalTime as created time is no longer present
Expand Down