Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Url phishing analysis framework #2552

Merged
merged 158 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from 144 commits
Commits
Show all changes
158 commits
Select commit Hold shift + click to select a range
c5422a4
Added selenium dependency
fgibertoni Apr 30, 2024
892e6d9
Added driver creation with custom proxy
fgibertoni May 2, 2024
041436f
Merge branch 'refs/heads/develop' into url-phishing-analysis-framework
fgibertoni Jun 3, 2024
347c6fe
Fixed load_env() parsing of .env files
fgibertoni Jun 27, 2024
03d2977
Merge branch 'refs/heads/fix-load-env-parsing' into url-phishing-anal…
fgibertoni Jun 27, 2024
a5bbc4f
Merge branch 'develop' of github.com:intelowlproject/IntelOwl into ur…
fgibertoni Jun 28, 2024
c1accc4
Added config proxy server as parameter
fgibertoni Jun 28, 2024
d2ecd34
Added return for source and server response
fgibertoni Jun 28, 2024
980d141
Also added request data
fgibertoni Jun 28, 2024
c8b991f
Added error handling
fgibertoni Jun 28, 2024
b8d6c52
Added mockup test
fgibertoni Jun 28, 2024
e836caa
Added default value for parameters
fgibertoni Jun 28, 2024
8359dd1
Converted proxy port to int instead of string
fgibertoni Jul 1, 2024
6cb76ae
Added migration for new analyzer
fgibertoni Jul 1, 2024
3bcc345
Merge branch 'refs/heads/develop' into playbook-phishing-analysis
fgibertoni Jul 29, 2024
2680454
Merge branch 'refs/heads/develop' into playbook-phishing-analysis
fgibertoni Jul 29, 2024
cbdc205
Moved migration to avoid conflicts
fgibertoni Jul 29, 2024
91c6866
Updated selenium version
fgibertoni Jul 30, 2024
76a5080
Merge branch 'develop' of github.com:intelowlproject/IntelOwl into pl…
fgibertoni Jul 30, 2024
ab928a0
Changed migration name
fgibertoni Aug 26, 2024
b2efd3f
Added more logging
fgibertoni Aug 26, 2024
8ef58e6
Added phishing analyzer integration
fgibertoni Sep 9, 2024
f3a11a6
Removed deprecated version tag from compose
fgibertoni Sep 10, 2024
2596f41
Removed selenium from IO requirements
fgibertoni Sep 11, 2024
54fc329
Re-added requirements in integration
fgibertoni Sep 11, 2024
46dc9a8
Used Ubuntu 24.04.1 python version
fgibertoni Sep 11, 2024
7673c55
Used UC
fgibertoni Sep 11, 2024
517ce92
First working draft with shell2http flask and docker image
fgibertoni Sep 11, 2024
f287f5f
Improved Dockerfile directives order
fgibertoni Sep 12, 2024
a1ba84d
Fixed headless ambiguity
fgibertoni Sep 12, 2024
2c75fc9
Used selenium and created analysis service. first working analysis
fgibertoni Sep 13, 2024
df84084
Merge branch 'develop' into url-phishing-analysis-framework
fgibertoni Sep 13, 2024
c034596
Merge branch 'url-phishing-analysis-framework' into playbook-phishing…
fgibertoni Sep 13, 2024
5bd735d
Fixed conflict with project-requirements.txt
fgibertoni Sep 13, 2024
ec86e48
Removed unused file
fgibertoni Sep 13, 2024
332f775
Removed old migration
fgibertoni Sep 13, 2024
e8b811e
Fixed docker permissions
fgibertoni Sep 13, 2024
56eeeec
Ran entrypoint as unprivileged user using sudoers file
fgibertoni Sep 17, 2024
4aa7060
Removed headless parameter and added --no-sandbox option explanation
fgibertoni Sep 17, 2024
9af4481
Merge branch 'develop' into url-phishing-analysis-framework
fgibertoni Sep 17, 2024
8ecbff3
Merge branch 'url-phishing-analysis-framework' into playbook-phishing…
fgibertoni Sep 17, 2024
d528b84
Restricted permissions and injection points
fgibertoni Sep 17, 2024
63737a0
Added another level of nesting for pivoting
fgibertoni Sep 17, 2024
2fc2716
Merge branch 'develop' into url-phishing-analysis-framework
fgibertoni Sep 18, 2024
1115496
Merge branch 'url-phishing-analysis-framework' into playbook-phishing…
fgibertoni Sep 18, 2024
b8ee6cb
Added migrations for analyzer, pivot and playbooks
fgibertoni Sep 18, 2024
0f5069d
Added update method and renamed analyzer
fgibertoni Sep 18, 2024
acdbb09
Renamed file to new name
fgibertoni Sep 18, 2024
6848860
Fixed deepsource docker
fgibertoni Sep 18, 2024
7b27d3c
Merge pull request #2510 from intelowlproject/playbook-phishing-analysis
fgibertoni Sep 18, 2024
b9c5203
fix
federicofantini Sep 18, 2024
1c5e302
Fixed migration ordering
fgibertoni Sep 18, 2024
ce5d497
Merge remote-tracking branch 'refs/remotes/origin/fix-migration-deps'…
fgibertoni Sep 18, 2024
d3a1b08
Fixed migration docker based analyzer
fgibertoni Sep 18, 2024
f56221a
Removed classification parameter
fgibertoni Sep 18, 2024
6857809
Fixed empty url error
fgibertoni Sep 20, 2024
141fb53
Fixed renaming
fgibertoni Sep 20, 2024
b4d0494
Removed 'report' duplicate
fgibertoni Sep 20, 2024
774db09
Set window size
fgibertoni Sep 20, 2024
fcd5eb2
Merge branch 'develop' into url-phishing-analysis-framework
fgibertoni Sep 20, 2024
11a9d12
Moved to seleniumbase
fgibertoni Sep 23, 2024
065910e
Added http traffic to output
fgibertoni Sep 23, 2024
58c9b4d
Used new variable format for docker
fgibertoni Sep 24, 2024
762f714
Added serialized traffic dump to report
fgibertoni Sep 24, 2024
1bc8835
Merge pull request #2518 from intelowlproject/http-traffic-intercept
fgibertoni Sep 24, 2024
41ced47
Added new serializer file
fgibertoni Sep 24, 2024
91226e6
Removed absolute imports
fgibertoni Sep 24, 2024
057df6e
Fixed load and dump of requests
fgibertoni Sep 25, 2024
c54237b
Removed driver download message
fgibertoni Sep 26, 2024
12f005c
Fixed migrations types
fgibertoni Sep 27, 2024
c29ab60
Merge branch 'develop' into url-phishing-analysis-framework
fgibertoni Sep 27, 2024
94f7958
Removed page_extraction key
fgibertoni Oct 8, 2024
2d9235f
Fixed field_to_compare and made base64
fgibertoni Oct 8, 2024
2deeeaa
Renamed endpoint
fgibertoni Oct 8, 2024
226083b
Merge branch 'develop' into url-phishing-analysis-framework
fgibertoni Oct 8, 2024
5af3506
Merge branch 'develop' into url-phishing-analysis-framework
fgibertoni Oct 8, 2024
e20e4f7
Added new file analyzer
fgibertoni Oct 8, 2024
2b57487
Added ascii decoding
fgibertoni Oct 8, 2024
58a843d
Added new analyzer
fgibertoni Oct 8, 2024
d345e02
Added JS support for certain websites
fgibertoni Oct 9, 2024
9211ddf
Added observable name extraction
fgibertoni Oct 9, 2024
1949b7e
Improved log message
fgibertoni Oct 9, 2024
d5453e8
Added docker based analyzer with request
fgibertoni Oct 9, 2024
c73b24a
Refactoring of integration folder structure
fgibertoni Oct 9, 2024
a3a5380
Fixed log message
fgibertoni Oct 9, 2024
3c97e6b
Removed unused target parameter
fgibertoni Oct 9, 2024
2435962
Moved driver message silencer to driver_wrapper
fgibertoni Oct 9, 2024
784d36b
Fixed naming consistency
fgibertoni Oct 10, 2024
6f2f09c
Added first draft of form compiling
fgibertoni Oct 10, 2024
55b786a
Fixed migration names
fgibertoni Oct 10, 2024
a2466c4
Fixed analyzer name in migration
fgibertoni Oct 10, 2024
1dd88bb
Added form retrieval
fgibertoni Oct 11, 2024
8fd64d8
Improved XPath selector. Removed div to reduce noise
fgibertoni Oct 14, 2024
0f3fc8d
Added method to dump requests format proxy
fgibertoni Oct 15, 2024
3d46d60
Replaced typehint from list to []
fgibertoni Oct 15, 2024
c2e7c7a
Fixed first prototype of form compilation with proxy added
fgibertoni Oct 15, 2024
7286996
Added input validation for proxy wrapper
fgibertoni Oct 15, 2024
53ef52b
Removed lower() method
fgibertoni Oct 15, 2024
be9f2b9
Merge branch 'develop' into url-phishing-analysis-framework
fgibertoni Oct 16, 2024
c6ff62e
Fixed urlparse attribute
fgibertoni Oct 16, 2024
b852ee9
Typo in logger name
fgibertoni Oct 16, 2024
994d6f0
Added encoding handling and more logs
fgibertoni Oct 16, 2024
f914d9e
Added first request parsing
fgibertoni Oct 16, 2024
80e1ff4
Minor refactoring
fgibertoni Oct 17, 2024
4a62801
Added text input mapping
fgibertoni Oct 17, 2024
2404c03
Refactored code to not use docker
fgibertoni Oct 17, 2024
3c23eb9
Replaced triple-parameter proxy with one string
fgibertoni Oct 17, 2024
74fc4c9
Added every command as absolute
fgibertoni Oct 17, 2024
592bc93
Updated seleniumbase
fgibertoni Oct 21, 2024
12816d5
Merge branch 'develop' into url-phishing-analysis-framework
fgibertoni Oct 21, 2024
fe8ed82
Fixed comment
fgibertoni Oct 21, 2024
c463832
Added selenium window size as parameter
fgibertoni Oct 22, 2024
388fcad
Fixed issue with _init_driver parameter
fgibertoni Oct 22, 2024
324d145
Parametrized and randomized submit data
fgibertoni Oct 23, 2024
60599d0
Extracted mapping from method
fgibertoni Oct 23, 2024
c932461
Removed redundant code
fgibertoni Oct 23, 2024
dc571f0
Added randomization of pin code
fgibertoni Oct 23, 2024
9515b4c
Handled detection of js script
fgibertoni Oct 23, 2024
e814dfb
Added domain type support with default protocol
fgibertoni Oct 23, 2024
21606ba
Fixed faker function
fgibertoni Oct 23, 2024
4b00aee
Added standalone selenium-wire with proxy
fgibertoni Oct 24, 2024
51e7610
Added HAR file to report
fgibertoni Oct 24, 2024
5027116
Merge branch 'develop' into url-phishing-analysis-framework
fgibertoni Oct 24, 2024
66aaf59
Moved migration to adapt to new one from develop
fgibertoni Oct 24, 2024
de2de1e
Added HAR method and renamed driver attribute
fgibertoni Oct 24, 2024
8da815d
Added explicit no_network parameter
fgibertoni Oct 24, 2024
6e635b9
Removed old TODO
fgibertoni Oct 24, 2024
fc3df92
Removed codefactor warning
fgibertoni Oct 24, 2024
a7a079e
Replaced Exception with warning
fgibertoni Oct 25, 2024
6c0cbf6
Fixed bytes decode error
fgibertoni Oct 25, 2024
95bd45d
Removed ascii encoding
fgibertoni Oct 25, 2024
c838c2a
Handled decode error
fgibertoni Oct 25, 2024
b256729
Fixed code doctor
fgibertoni Oct 28, 2024
2467c8f
Added tracking for new requirements
fgibertoni Nov 4, 2024
e8eb673
Added explaining comment
fgibertoni Nov 4, 2024
79dc5c6
Added basic logging in serialization
fgibertoni Nov 4, 2024
0c27238
Changed TLP to CLEAR
fgibertoni Nov 4, 2024
e78b719
Update api_app/analyzers_manager/file_analyzers/phishing/phishing_for…
fgibertoni Nov 4, 2024
9e1a3da
Update api_app/analyzers_manager/migrations/0124_analyzer_config_phis…
fgibertoni Nov 4, 2024
43f7ec9
Made log level debug
fgibertoni Nov 4, 2024
9c3bb50
Added more logs to driver wrapper
fgibertoni Nov 4, 2024
ae2aaa6
Added various log and error handling
fgibertoni Nov 4, 2024
860540d
Added Job ID
fgibertoni Nov 4, 2024
308e801
Fixed logs and report for form compiler
fgibertoni Nov 5, 2024
af3e318
Fixed proxy address adding protocol
fgibertoni Nov 5, 2024
3e29422
Removed f-string nesting
fgibertoni Nov 5, 2024
de05908
Used enum in action
fgibertoni Nov 5, 2024
c9615d6
Merge branch 'develop' into url-phishing-analysis-framework
fgibertoni Nov 5, 2024
d8145ea
Fixed migration order
fgibertoni Nov 5, 2024
e194fac
Fixed migration order
fgibertoni Nov 5, 2024
c95ea26
Removed wrongly returned report
fgibertoni Nov 5, 2024
feca528
Merge branch 'develop' of github.com:intelowlproject/IntelOwl into ur…
fgibertoni Nov 5, 2024
66ff1d9
Added docker to dependabot
fgibertoni Nov 5, 2024
1e858b7
Fixed comments
fgibertoni Nov 5, 2024
6467c04
Removed Enum
fgibertoni Nov 5, 2024
892793a
Fixed linter
fgibertoni Nov 6, 2024
e250af6
Removed typo duplicated line
fgibertoni Nov 6, 2024
6ac0692
Removed a level in decorator
fgibertoni Nov 6, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ updates:
- dependency-name: "*"
update-types: [ "version-update:semver-patch" ]

- package-ecosystem: "pip"
fgibertoni marked this conversation as resolved.
Show resolved Hide resolved
directory: "/integrations/phishing_analyzers"
schedule:
interval: "weekly"
day: "tuesday"
target-branch: "dependabot-validation"
ignore:
# ignore all patch updates since we are using ~=
# this does not work for security updates
- dependency-name: "*"
update-types: [ "version-update:semver-patch" ]

- package-ecosystem: "npm"
directory: "/frontend"
schedule:
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
import logging
from datetime import date, timedelta
from typing import Dict

import requests
from faker import Faker
from lxml.etree import HTMLParser
from lxml.html import document_fromstring
from requests import HTTPError, Response

from api_app.analyzers_manager.classes import FileAnalyzer
from api_app.models import PythonConfig

logger = logging.getLogger(__name__)
fake = Faker()


def xpath_query_on_page(page, xpath_selector: str) -> []:
return page.xpath(xpath_selector)


class PhishingFormCompiler(FileAnalyzer):
fgibertoni marked this conversation as resolved.
Show resolved Hide resolved
# good short guide for writing XPath expressions
# https://upg-dh.newtfire.org/explainXPath.html
# we're supporting XPath up to v3.1 with elementpath package
xpath_form_selector: str = ""
xpath_js_selector: str = ""
proxy_address: str = ""

name_matching: list = []
cc_matching: list = []
pin_matching: list = []
cvv_matching: list = []
expiration_date_matching: list = []

# mapping between name attribute of text <input>
# and their corresponding fake values
_name_text_input_mapping: {tuple: str} = {
tuple(name_matching): fake.user_name(),
tuple(cc_matching): fake.credit_card_number(),
tuple(pin_matching): str(fake.random.randint(10000, 100000)),
tuple(cvv_matching): fake.credit_card_security_code(),
tuple(expiration_date_matching): fake.credit_card_expire(
start=date.today(),
end=date.today() + timedelta(days=fake.random.randint(1, 1000)),
date_format="%m/%y",
),
}

FAKE_EMAIL_INPUT: str = fake.email()
FAKE_PASSWORD_INPUT: str = fake.password(
length=16,
special_chars=True,
digits=True,
upper_case=True,
lower_case=True,
)
FAKE_TEL_INPUT: str = fake.phone_number()

def __init__(
self,
config: PythonConfig,
**kwargs,
):
super().__init__(config, **kwargs)
self.target_site: str = ""
self.html_source_code: str = ""
self.parsed_page = None
self.args: [] = []

def config(self, runtime_configuration: Dict):
super().config(runtime_configuration)
if hasattr(self._job, "pivot_parent"):
# extract target site from parent job
self.target_site = self._job.pivot_parent.starting_job.observable_name
else:
logger.warning(
f"Job #{self.job_id}: Analyzer {self.analyzer_name} should be ran from PhishingAnalysis playbook."
)
if self.target_site:
logger.info(
f"Job #{self.job_id}: Extracted {self.target_site} from parent job."
)
else:
logger.info(
f"Job #{self.job_id}: Target site from parent job not found! Proceeding with only source code."
)

# extract and decode source code from file
self.html_source_code = self.read_file_bytes()
if self.html_source_code:
logger.debug(f"Job #{self.job_id}: {self.html_source_code=}")
try:
self.html_source_code = self.html_source_code.decode("utf-8")
except UnicodeDecodeError as e:
logger.warning(
f"Job #{self.job_id}: Error during HTML source page decoding: {e}\nTrying to fix the error..."
)
self.html_source_code = self.html_source_code.decode(
"utf-8", errors="replace"
)
else:
logger.info(
f"Job #{self.job_id}: Extracted html source code from pivot."
)
else:
raise ValueError("Failed to extract source code from pivot!")

# recover=True tries to read not well-formed HTML
html_parser = HTMLParser(recover=True, no_network=True)
self.parsed_page = document_fromstring(
self.html_source_code, parser=html_parser
)

def search_phishing_forms_xpath(self) -> []:
# extract using a custom XPath selector if set
return (
xpath_query_on_page(self.parsed_page, self.xpath_form_selector)
if self.xpath_form_selector
else []
)

def identify_text_input(self, input_name: str) -> str:
for names, fake_value in self._name_text_input_mapping.items():
if input_name in names:
return fake_value

def compile_form_field(self, form) -> (dict, str):
result: {} = {}
# setting default to page itself if action is not specified
if not (form_action := form.get("action", None)):
form_action = self.target_site
for element in form.findall(".//input"):
input_type: str = element.get("type", None)
input_name: str = element.get("name", None)
input_value: str = element.get("value", None)
value_to_set: str = ""
match input_type.lower():
case "hidden":
logger.info(
f"Job #{self.job_id}: Found hidden input tag with {input_name=} and {input_value=}"
)
value_to_set = input_value

case "text":
value_to_set = self.identify_text_input(input_name)
case "password":
value_to_set = self.FAKE_PASSWORD_INPUT
case "tel":
value_to_set = self.FAKE_TEL_INPUT
case "email":
value_to_set = self.FAKE_EMAIL_INPUT
case _:
logger.info(
f"Job #{self.job_id}: {input_type.lower()} is not supported yet!"
)

logger.info(
f"Job #{self.job_id}: Sending value {value_to_set} for {input_name=}"
Dismissed Show dismissed Hide dismissed
)
result.setdefault(input_name, value_to_set)
return result, form_action

def perform_request_to_form(self, form) -> Response:
params, dest_url = self.compile_form_field(form)
logger.info(f"Job #{self.job_id}: Sending {params=} to submit url {dest_url}")
return requests.post(
url=dest_url,
params=params,
data=params,
proxies=(
{"http": self.proxy_address, "https": self.proxy_address}
if self.proxy_address
else None
),
)

@staticmethod
def handle_3xx_response(response: Response) -> [str]:
# extract all redirection history
return [history.request.url for history in response.history]

@staticmethod
def handle_2xx_response(response: Response) -> str:
return response.request.url

def is_js_used_in_page(self) -> bool:
js_tag: [] = xpath_query_on_page(self.parsed_page, self.xpath_js_selector)
if js_tag:
logger.info(f"Job #{self.job_id}: Found script tag: {js_tag}")
return bool(js_tag)

def analyze_responses(self, responses: [Response]) -> {}:
result: [] = []
for response in responses:
try:
# handle 4xx and 5xx
response.raise_for_status()
except HTTPError as e:
message = f"Error during request to {response.request.url}: {e}"
logger.error(f"Job #{self.job_id}:" + message)
self.report.errors.append(message)
else:
if response.history:
result.extend(self.handle_3xx_response(response))

result.append(self.handle_2xx_response(response))
self.report.save()

return result

def run(self) -> dict:
result: {} = {}
if not (
forms := xpath_query_on_page(self.parsed_page, self.xpath_form_selector)
):
message = (
f"Form not found in {self.target_site=} with "
f"{self.xpath_form_selector=}! This could mean that the XPath"
f" selector requires some tuning."
)
logger.warning(f"Job #{self.job_id}: " + message)
self.report.errors.append(message)
self.report.save()
return self.report
logger.info(
f"Job #{self.job_id}: Found {len(forms)} forms in page {self.target_site}"
)

responses: [Response] = []
for form in forms:
responses.append(self.perform_request_to_form(form))

result.setdefault("extracted_urls", self.analyze_responses(responses))
result.setdefault("has_javascript", self.is_js_used_in_page())
return result

def update(self) -> bool:
pass
Loading
Loading