diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 857d717..9d191e6 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -25,7 +25,6 @@ jobs: .\dist\ms_teams_parser.exe -f ".\forensicsim-data\jane_doe_v_1_4_00_11161\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb" -o "jane_doe_v_1_4_00_11161.json" .\dist\ms_teams_parser.exe -f ".\forensicsim-data\john_doe_v_1_4_00_11161\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb" -o "john_doe_v_1_4_00_11161.json" .\dist\ms_teams_parser.exe -f ".\forensicsim-data\karelze_v_23306_3309_2530_1346\IndexedDB\https_teams.live.com_0.indexeddb.leveldb" -o "karelze_v_23306_3309_2530_1346.json" - .\dist\ms_teams_parser.exe -f ".\forensicsim-data\mboufahja_v_23231_413_2355_7555\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb" -o "mboufahja_v_23231_413_2355_7555.json" - name: Upload results📲 uses: actions/upload-artifact@v4 with: @@ -34,7 +33,6 @@ jobs: jane_doe_v_1_4_00_11161.json john_doe_v_1_4_00_11161.json karelze_v_23306_3309_2530_1346.json - mboufahja_v_23231_413_2355_7555.json retention-days: 1 - name: Test calling script 🖱️ run: | @@ -43,8 +41,7 @@ jobs: python tools/dump_localstorage.py --help python tools/dump_sessionstorage.py --help - name: Calculate diff 👽 - run: | + run: | git diff --no-index --word-diff .\forensicsim-data\expected-result\jane_doe_v_1_4_00_11161.json jane_doe_v_1_4_00_11161.json git diff --no-index --word-diff .\forensicsim-data\expected-result\john_doe_v_1_4_00_11161.json john_doe_v_1_4_00_11161.json git diff --no-index --word-diff .\forensicsim-data\expected-result\karelze_v_23306_3309_2530_1346.json karelze_v_23306_3309_2530_1346.json - git diff --no-index --word-diff .\forensicsim-data\expected-result\mboufahja_v_23231_413_2355_7555.json mboufahja_v_23231_413_2355_7555.json diff --git a/forensicsim-data b/forensicsim-data index 15fa701..9653c94 160000 --- a/forensicsim-data +++ b/forensicsim-data @@ -1 +1 @@ -Subproject commit 15fa701e224d0b11f3e726758c4cf1af33b3c804 +Subproject commit 9653c9400508920de0717f0ade5f1458b93c69ec diff --git a/src/forensicsim/parser.py b/src/forensicsim/parser.py index eb465e9..f763c64 100644 --- a/src/forensicsim/parser.py +++ b/src/forensicsim/parser.py @@ -1,4 +1,5 @@ import json +import logging import warnings from dataclasses import dataclass, field from datetime import datetime @@ -189,10 +190,11 @@ def _parse_people(people: list[dict], version: str) -> set[Contact]: and p.get("mri") is not None and version in ("v1", "v2") ): - p |= p.get("value", {}) - parsed_people.add(Contact.from_json(json.dumps(p))) + parsed_people.add(Contact.from_dict(p | p.get("value", {}))) else: - print("Teams Version is unknown. Can not extract records of type people.") + logging.warning( + "Teams Version is unknown. Can not extract records of type people." + ) return parsed_people @@ -200,77 +202,67 @@ def _parse_buddies(buddies: list[dict], version: str) -> set[Contact]: parsed_buddies = set() for b in buddies: - # Skip empty records b_value = b.get("value", {}) - # Fetch relevant data if b_value and version in ("v1", "v2"): buddies_of_b = b_value.get("buddies", []) for b_of_b in buddies_of_b: - parsed_buddies.add(Contact.from_json(json.dumps(b_of_b))) + parsed_buddies.add(Contact.from_dict(b_of_b)) else: - print("Teams Version is unknown. Can not extract records of type buddies.") + logging.warning( + "Teams Version is unknown. Can not extract records of type buddies." + ) return parsed_buddies -# Conversations can contain multiple artefacts -# -> If type:Meeting then its a meeting def _parse_conversations(conversations: list[dict], version: str) -> set[Meeting]: cleaned_conversations = set() + for c in conversations: - if c.get("value") is not None and version in ("v1", "v2"): - if c.get("value", {}).get("type", "") == "Meeting" and "meeting" in c.get( - "value", {} - ).get("threadProperties", {}): - c |= c.get("value", {}) - c |= {"cached_deduplication_key": c.get("id")} - cleaned_conversations.add(Meeting.from_json(json.dumps(c))) + value = c.get("value", {}) + thread_properties = value.get("threadProperties", {}) + # Conversations can contain multiple artefacts. Filter only for meetings. + if version in ("v1", "v2") and "meeting" in thread_properties: + c |= value + c |= {"cached_deduplication_key": c.get("id")} + cleaned_conversations.add(Meeting.from_dict(c)) else: - print("Teams Version is unknown. Can not extract records of type meeting.") + logging.warning( + "Teams Version is unknown. Can not extract records of type meeting." + ) return cleaned_conversations def _parse_reply_chains(reply_chains: list[dict], version: str) -> set[Message]: cleaned_reply_chains = set() + for rc in reply_chains: + rc_value = rc.get("value", {}) + # Skip empty records - if rc["value"] is None: + if not rc_value: continue # Fetch relevant data - rc |= rc.get("value", {}) - rc |= {"origin_file": rc.get("origin_file")} - + rc |= rc_value message_dict = {} if version == "v1": - message_dict = rc.get("value", {}).get("messages", {}) + message_dict = rc_value.get("messages", {}) elif version == "v2": - message_dict = rc.get("value", {}).get("messageMap", {}) + message_dict = rc_value.get("messageMap", {}) else: - print( + logging.warning( "Teams Version is unknown. Can not extract records of type reply_chains." ) continue for k in message_dict: md = message_dict[k] - if ( - md.get("messagetype", "") == "RichText/Html" - or md.get("messagetype", "") == "Text" - or md.get("messageType", "") == "RichText/Html" - or md.get("messageType", "") == "Text" - ): - if version == "v1": - rc |= {"cached_deduplication_key": md.get("cachedDeduplicationKey")} - rc |= {"clientmessageid": md.get("clientmessageid")} - rc |= {"composetime": md.get("composetime")} - rc |= {"contenttype": md.get("contenttype")} - rc |= {"created_time": md.get("createdTime")} - rc |= {"is_from_me": md.get("isFromMe")} - rc |= {"messagetype": md.get("messagetype")} - rc |= {"messageKind": md.get("messageKind")} - rc |= {"original_arrival_time": md.get("originalarrivaltime")} - - elif version == "v2": + if md.get("messagetype", "") in ("RichText/Html", "Text") or md.get( + "messageType" + ) in ("RichText/Html", "Text"): + rc |= md + # map to teams 1.x keys + if version == "v2": rc |= {"cached_deduplication_key": md.get("dedupeKey")} rc |= {"clientmessageid": md.get("clientMessageId")} # set to clientArrivalTime as compose Time is no longer present @@ -280,15 +272,6 @@ def _parse_reply_chains(reply_chains: list[dict], version: str) -> set[Message]: rc |= {"created_time": md.get("clientArrivalTime")} rc |= {"is_from_me": md.get("isSentByCurrentUser")} rc |= {"messagetype": md.get("messageType")} - rc |= {"original_arrival_time": md.get("originalArrivalTime")} - - # Similar across versions - rc |= {"creator": md.get("creator")} - rc |= {"conversation_id": md.get("conversationId")} - rc |= {"content": md.get("content")} - rc |= {"client_arrival_time": md.get("clientArrivalTime")} - rc |= {"version": md.get("version")} - rc |= {"properties": md.get("properties")} cleaned_reply_chains.add(Message.from_dict(rc)) diff --git a/tools/Forensicsim_Parser.py b/tools/Forensicsim_Parser.py index ce35ec1..3b35d94 100644 --- a/tools/Forensicsim_Parser.py +++ b/tools/Forensicsim_Parser.py @@ -88,7 +88,7 @@ # Factory that defines the name and details of the module and allows Autopsy # to create instances of the modules that will do the analysis. class ForensicIMIngestModuleFactory(IngestModuleFactoryAdapter): - def __init__(self) -> None: + def __init__(self): self.settings = None moduleName = "Microsoft Teams Parser"