diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8551da363..cda75213f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: pip install codespell flake8 -r requirements.txt - name: Test run: | - python3 dirsearch.py -w ./tests/static/wordlist.txt -u https://example.com -o tmp_report.json --format json --force-recursive -R 3 --full-url -q -O + python3 dirsearch.py -w ./tests/static/wordlist.txt -u https://example.com -o tmp_report.json --output-formats json --force-recursive -R 3 --full-url -q python3 dirsearch.py -w ./tests/static/wordlist.txt -l ./tests/static/targets.txt --subdirs /,admin/ --exclude-extensions conf -q -L -f -i 200 --user-agent a --log tmp_log.log python3 dirsearch.py -w ./tests/static/wordlist.txt --nmap-report ./tests/static/nmap.xml --max-rate 2 -H K:V --random-agent --overwrite-extensions --no-color python3 dirsearch.py -w ./tests/static/wordlist.txt --raw ./tests/static/raw.txt --prefixes . --suffixes ~ --skip-on-status 404 -m POST -d test=1 --crawl --min-response-size 9 diff --git a/CHANGELOG.md b/CHANGELOG.md index 4677bafa5..604665333 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,10 @@ # Changelog ## [Unreleased] +- Ability to use multiple output formats +- MySQL and PostgreSQL report formats +- Support variables in file path and SQL table name for saving results - Support non-default network interface -- Remove unused dependencies (urllib3, cryptography, cffi, idna, chardet) - Load targets from a Nmap XML report - Added --async option to enable asynchronous mode (use coroutines instead of threads) diff --git a/config.ini b/config.ini index 7a005a2ae..dc266ee5f 100644 --- a/config.ini +++ b/config.ini @@ -14,18 +14,18 @@ exclude-subdirs = %%ff/,.;/,..;/,;/,./,../,%%2e/,%%2e%%2e/ random-user-agents = False max-time = 0 exit-on-error = False -# subdirs = /,api/ -# include-status = 200-299,401 -# exclude-status = 400,500-999 -# exclude-sizes = 0b,123gb -# exclude-texts = [ -# "Not found", -# "404" -# ] -# exclude-regex = "^403$" -# exclude-redirect = "*/error.html" -# exclude-response = 404.html -# skip-on-status = 429,999 +#subdirs = /,api/ +#include-status = 200-299,401 +#exclude-status = 400,500-999 +#exclude-sizes = 0b,123gb +#exclude-texts = [ +# "Not found", +# "404" +#] +#exclude-regex = "^403$" +#exclude-redirect = "*/error.html" +#exclude-response = 404.html +#skip-on-status = 429,999 [dictionary] default-extensions = php,aspx,jsp,html,js @@ -33,34 +33,34 @@ force-extensions = False overwrite-extensions = False lowercase = False uppercase = False -capitalization = False -# exclude-extensions = old,log -# prefixes = .,admin -# suffixes = ~,.bak -# wordlists = /path/to/wordlist1.txt,/path/to/wordlist2.txt +capital = False +#exclude-extensions = old,log +#prefixes = .,admin +#suffixes = ~,.bak +#wordlists = /path/to/wordlist1.txt,/path/to/wordlist2.txt [request] http-method = get follow-redirects = False -# headers = [ -# "Header1: Value", -# "Header2: Value" -# ] -# headers-file = /path/to/headers.txt -# user-agent = MyUserAgent -# cookie = SESSIONID=123 +#headers = [ +# "Header1: Value", +# "Header2: Value" +#] +#headers-file = /path/to/headers.txt +#user-agent = MyUserAgent +#cookie = SESSIONID=123 [connection] timeout = 7.5 delay = 0 max-rate = 0 max-retries = 1 -## By disabling `scheme` variable, dirsearch will automatically identify the URI scheme -# scheme = http -# proxies = ["localhost:8080"] -# proxies-file = /path/to/proxies.txt -# replay-proxy = localhost:8000 -# network-interface = eth0 +# By disabling `scheme` variable, dirsearch will automatically identify the URI scheme +#scheme = http +#proxies = ["localhost:8080"] +#proxies-file = /path/to/proxies.txt +#replay-proxy = localhost:8000 +#network-interface = eth0 [advanced] crawl = False @@ -72,9 +72,21 @@ color = True show-redirects-history = False [output] -## Support: plain, simple, json, xml, md, csv, html, sqlite, mysql, postgresql -report-format = plain -autosave-report = True -autosave-report-folder = reports/ -# log-file = /path/to/dirsearch.log -# log-file-size = 50000000 +# Available: simple, plain, json, xml, md, csv, html, sqlite, mysql, postgresql +output-formats = plain +# Supported variables for 'output-file and 'output-sql-table': +# - {extension}: File extension of the report, for 'output-file' only (e.g. txt, json) +# - {format}: Output format (e.g. plain, simple, xml) +# - {host}: Target hostname or IP (e.g. example.com) +# - {scheme}: URI scheme (http or https) +# - {port}: Port number (e.g. 443) +# - {date}: Scan date, format: DD-MM-YYYY (e.g. 07-10-2022) +# +# For output formats other than PostgreSQL and MySQL +#output-file = reports/{host}/{scheme}_{port}.{extension} +#mysql-url = mysql://user:password@localhost/database +#postgres-url = postgres://user:password@localhost/database +# Table to be used for SQL output +output-sql-table = {scheme}_{host}:{port} +#log-file = /path/to/dirsearch.log +#log-file-size = 50000000 diff --git a/lib/connection/response.py b/lib/connection/response.py index b9a56fbc9..14937cea5 100755 --- a/lib/connection/response.py +++ b/lib/connection/response.py @@ -16,6 +16,7 @@ # # Author: Mauro Soria +import time import httpx from lib.core.settings import ( @@ -25,11 +26,12 @@ UNKNOWN, ) from lib.parse.url import clean_path, parse_path -from lib.utils.common import is_binary +from lib.utils.common import get_readable_size, is_binary class BaseResponse: def __init__(self, response): + self.datetime = time.strftime("%Y-%m-%d %H:%M:%S") self.url = str(response.url) self.full_path = parse_path(self.url) self.path = clean_path(self.full_path) @@ -54,6 +56,10 @@ def length(self): except TypeError: return len(self.body) + @property + def size(self): + return get_readable_size(self.length) + def __hash__(self): return hash(self.body) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 8abae1871..435598757 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -32,6 +32,8 @@ from lib.core.decorators import locked from lib.core.dictionary import Dictionary, get_blacklists from lib.core.exceptions import ( + CannotConnectException, + FileExistsException, InvalidRawRequest, InvalidURLException, RequestException, @@ -47,23 +49,13 @@ EXTENSION_RECOGNITION_REGEX, MAX_CONSECUTIVE_REQUEST_ERRORS, NEW_LINE, - SCRIPT_PATH, STANDARD_PORTS, UNKNOWN, ) from lib.parse.rawrequest import parse_raw from lib.parse.url import clean_path, parse_path -from lib.reports.csv_report import CSVReport -from lib.reports.html_report import HTMLReport -from lib.reports.json_report import JSONReport -from lib.reports.markdown_report import MarkdownReport -from lib.reports.mysql_report import MySQLReport -from lib.reports.plain_text_report import PlainTextReport -from lib.reports.postgresql_report import PostgreSQLReport -from lib.reports.simple_report import SimpleReport -from lib.reports.sqlite_report import SQLiteReport -from lib.reports.xml_report import XMLReport -from lib.utils.common import get_valid_filename, lstrip_once +from lib.report.manager import ReportManager +from lib.utils.common import lstrip_once from lib.utils.file import FileUtils from lib.utils.pickle import pickle, unpickle from lib.utils.schemedet import detect_scheme @@ -143,12 +135,9 @@ def setup(self): self.requester = Requester() self.dictionary = Dictionary(files=options["wordlists"]) - self.results = [] self.start_time = time.time() self.passed_urls = set() self.directories = [] - self.report = None - self.batch = False self.jobs_processed = 0 self.errors = 0 self.consecutive_errors = 0 @@ -164,8 +153,6 @@ def setup(self): self.requester.set_proxy_auth(options["proxy_auth"]) if options["log_file"]: - options["log_file"] = FileUtils.get_abs_path(options["log_file"]) - try: FileUtils.create_dir(FileUtils.parent(options["log_file"])) if not FileUtils.can_write(options["log_file"]): @@ -179,27 +166,11 @@ def setup(self): ) exit(1) - if options["autosave_report"] and not options["output"]: - self.report_path = options["output_path"] or FileUtils.build_path( - SCRIPT_PATH, "reports" - ) - - try: - FileUtils.create_dir(self.report_path) - if not FileUtils.can_write(self.report_path): - raise Exception - - except Exception: - interface.error( - f"Couldn't create report folder at {self.report_path}" - ) - exit(1) - interface.header(BANNER) interface.config(len(self.dictionary)) try: - self.setup_reports() + self.reporter = ReportManager(options["output_formats"]) except ( InvalidURLException, mysql.connector.Error, @@ -219,7 +190,7 @@ def run(self): # error_callbacks callback values: # - *args[0]: exception match_callbacks = ( - self.match_callback, self.reset_consecutive_errors + self.match_callback, self.reporter.save, self.reset_consecutive_errors ) not_found_callbacks = ( self.update_progress_bar, self.reset_consecutive_errors @@ -246,9 +217,12 @@ def run(self): if not self.old_session: interface.target(self.url) + self.reporter.prepare(self.url) self.start() except ( + CannotConnectException, + FileExistsException, InvalidURLException, RequestException, SkipTargetInterrupt, @@ -261,6 +235,7 @@ def run(self): interface.error(str(e)) except QuitInterrupt as e: + self.reporter.finish() interface.error(e.args[0]) exit(0) @@ -268,6 +243,7 @@ def run(self): options["urls"].pop(0) interface.warning("\nTask Completed") + self.reporter.finish() if options["session_file"]: try: @@ -385,95 +361,6 @@ def set_target(self, url): self.requester.set_url(self.url) - def setup_batch_reports(self): - """Create batch report folder""" - - self.batch = True - current_time = time.strftime("%y-%m-%d_%H-%M-%S") - batch_session = f"BATCH-{current_time}" - batch_directory_path = FileUtils.build_path(self.report_path, batch_session) - - try: - FileUtils.create_dir(batch_directory_path) - except Exception: - interface.error(f"Couldn't create batch folder at {batch_directory_path}") - exit(1) - - return batch_directory_path - - def get_output_extension(self): - if options["output_format"] in ("plain", "simple"): - return "txt" - - return options["output_format"] - - def setup_reports(self): - """Create report file""" - - output = options["output"] - - if options["autosave_report"] and not output and options["output_format"] not in ("mysql", "postgresql"): - if len(options["urls"]) > 1: - directory_path = self.setup_batch_reports() - filename = "BATCH." + self.get_output_extension() - else: - self.set_target(options["urls"][0]) - - parsed = urlparse(self.url) - - if not parsed.netloc: - parsed = urlparse(f"//{options['urls'][0]}") - - filename = get_valid_filename(f"{parsed.path}_") - filename += time.strftime("%y-%m-%d_%H-%M-%S") - filename += f".{self.get_output_extension()}" - directory_path = FileUtils.build_path( - self.report_path, get_valid_filename(f"{parsed.scheme}_{parsed.netloc}") - ) - - output = FileUtils.get_abs_path((FileUtils.build_path(directory_path, filename))) - - if FileUtils.exists(output): - i = 2 - while FileUtils.exists(f"{output}_{i}"): - i += 1 - - output += f"_{i}" - - try: - FileUtils.create_dir(directory_path) - except Exception: - interface.error( - f"Couldn't create the reports folder at {directory_path}" - ) - exit(1) - - if not output: - return - - if options["output_format"] == "plain": - self.report = PlainTextReport(output) - elif options["output_format"] == "json": - self.report = JSONReport(output) - elif options["output_format"] == "xml": - self.report = XMLReport(output) - elif options["output_format"] == "md": - self.report = MarkdownReport(output) - elif options["output_format"] == "csv": - self.report = CSVReport(output) - elif options["output_format"] == "html": - self.report = HTMLReport(output) - elif options["output_format"] == "sqlite": - self.report = SQLiteReport(output) - elif options["output_format"] == "mysql": - self.report = MySQLReport(output) - elif options["output_format"] == "postgresql": - self.report = PostgreSQLReport(output) - else: - self.report = SimpleReport(output) - - interface.output_location(output) - def reset_consecutive_errors(self, response): self.consecutive_errors = 0 @@ -511,10 +398,6 @@ def match_callback(self, response): else: self.requester.request(response.full_path, proxy=options["replay_proxy"]) - if self.report: - self.results.append(response) - self.report.save(self.results) - def update_progress_bar(self, response): jobs_count = ( # Jobs left for unscanned targets diff --git a/lib/core/exceptions.py b/lib/core/exceptions.py index 69b481e8e..6a3940a07 100755 --- a/lib/core/exceptions.py +++ b/lib/core/exceptions.py @@ -17,10 +17,18 @@ # Author: Mauro Soria +class CannotConnectException(Exception): + pass + + class FailedDependenciesInstallation(Exception): pass +class FileExistsException(Exception): + pass + + class InvalidRawRequest(Exception): pass diff --git a/lib/core/fuzzer.py b/lib/core/fuzzer.py index 29a1519fe..4d0045028 100755 --- a/lib/core/fuzzer.py +++ b/lib/core/fuzzer.py @@ -35,7 +35,7 @@ WILDCARD_TEST_POINT_MARKER, ) from lib.parse.url import clean_path -from lib.utils.common import human_size, lstrip_once +from lib.utils.common import get_readable_size, lstrip_once from lib.utils.crawl import Crawler @@ -95,13 +95,16 @@ def is_excluded(resp: BaseResponse) -> bool: ): return True - if resp.status in blacklists and any( - resp.path.endswith(lstrip_once(suffix, "/")) - for suffix in blacklists.get(resp.status) + if ( + resp.status in blacklists + and any( + resp.path.endswith(lstrip_once(suffix, "/")) + for suffix in blacklists.get(resp.status) + ) ): return True - if human_size(resp.length).rstrip() in options["exclude_sizes"]: + if get_readable_size(resp.length).rstrip() in options["exclude_sizes"]: return True if resp.length < options["minimum_response_size"]: @@ -113,14 +116,15 @@ def is_excluded(resp: BaseResponse) -> bool: if any(text in resp.content for text in options["exclude_texts"]): return True - if options["exclude_regex"] and re.search( - options["exclude_regex"], resp.content - ): + if options["exclude_regex"] and re.search(options["exclude_regex"], resp.content): return True - if options["exclude_redirect"] and ( - options["exclude_redirect"] in resp.redirect - or re.search(options["exclude_redirect"], resp.redirect) + if ( + options["exclude_redirect"] + and ( + options["exclude_redirect"] in resp.redirect + or re.search(options["exclude_redirect"], resp.redirect) + ) ): return True diff --git a/lib/core/options.py b/lib/core/options.py index 8bcdce5be..6b915257f 100755 --- a/lib/core/options.py +++ b/lib/core/options.py @@ -32,7 +32,7 @@ def parse_options(): - opt = parse_config(parse_arguments()) + opt = merge_config(parse_arguments()) if opt.session_file: if opt.async_mode: @@ -196,13 +196,33 @@ def parse_options(): ) exit(1) - if opt.output_format not in OUTPUT_FORMATS: - print( - "Select one of the following output formats: " - f"{', '.join(OUTPUT_FORMATS)}" + opt.output_formats = [format.strip() for format in opt.output_formats.split(",")] + invalid_formats = set(opt.output_formats).difference(OUTPUT_FORMATS) + + if invalid_formats: + print(f"Invalid output format(s): {', '.join(invalid_formats)}") + exit(1) + + # There are multiple file-based output formats but no variable to separate output files for different formats + if ( + opt.output_file + and "{format}" not in opt.output_file + and len(opt.output_formats) - ("mysql" in opt.output_formats) - ("postgresql" in opt.output_formats) > 1 + and ( + "{extension}" not in opt.output_file + # "plain" and "simple" have the same file extension (txt) + or {"plain", "simple"}.issubset(opt.output_formats) ) + ): + print("Found at least 2 output formats sharing the same output file, make sure you use '{format}' and '{extension} variables in your output file") exit(1) + if opt.log_file: + opt.log_file = FileUtils.get_abs_path(opt.log_file) + + if opt.output_file: + opt.output_file = FileUtils.get_abs_path(opt.output_file) + return vars(opt) @@ -243,7 +263,7 @@ def _access_file(path): return fd -def parse_config(opt): +def merge_config(opt): config = ConfigParser() config.read(opt.config) @@ -316,8 +336,8 @@ def parse_config(opt): opt.suffixes = opt.suffixes or config.safe_get("dictionary", "suffixes", "") opt.lowercase = opt.lowercase or config.safe_getboolean("dictionary", "lowercase") opt.uppercase = opt.uppercase or config.safe_getboolean("dictionary", "uppercase") - opt.capitalization = opt.capitalization or config.safe_getboolean( - "dictionary", "capitalization" + opt.capital = opt.capital or config.safe_getboolean( + "dictionary", "capital" ) # Request @@ -364,12 +384,14 @@ def parse_config(opt): ) # Output - opt.output_path = config.safe_get("output", "autosave-report-folder") - opt.autosave_report = config.safe_getboolean("output", "autosave-report") - opt.log_file_size = config.safe_getint("output", "log-file-size") - opt.log_file = opt.log_file or config.safe_get("output", "log-file") - opt.output_format = opt.output_format or config.safe_get( - "output", "report-format", "plain", OUTPUT_FORMATS + opt.output_file = opt.output_file or config.safe_get("output", "output-file") + opt.mysql_url = opt.mysql_url or config.safe_get("output", "mysql-url") + opt.postgres_url = opt.postgres_url or config.safe_get("output", "postgres-url") + opt.output_table = config.safe_get("output", "output-sql-table") + opt.output_formats = opt.output_formats or config.safe_get( + "output", "output-format", "plain" ) + opt.log_file = opt.log_file or config.safe_get("output", "log-file") + opt.log_file_size = config.safe_getint("output", "log-file-size") return opt diff --git a/lib/core/settings.py b/lib/core/settings.py index 05b43dafe..7d663b107 100755 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -19,6 +19,7 @@ import os import sys import string +import time from lib.utils.file import FileUtils @@ -30,6 +31,12 @@ (_||| _) (/_(_|| (_| ) """ +COMMAND = " ".join(sys.argv) + +START_TIME = time.strftime("%Y-%m-%d %H:%M:%S") + +START_DATETIME = time.strftime("%Y-%m-%d") + SCRIPT_PATH = FileUtils.parent(__file__, 3) OPTIONS_FILE = "options.ini" @@ -62,8 +69,6 @@ STANDARD_PORTS = {"http": 80, "https": 443} -INSECURE_CSV_CHARS = ("+", "-", "=", "@") - DEFAULT_TEST_PREFIXES = (".",) DEFAULT_TEST_SUFFIXES = ("/",) diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py index a63554a54..e0908db58 100755 --- a/lib/parse/cmdline.py +++ b/lib/parse/cmdline.py @@ -94,7 +94,7 @@ def parse_arguments(): "--extensions", action="store", dest="extensions", - help="Extension list separated by commas (e.g. php,asp)", + help="Extension list, separated by commas (e.g. php,asp)", ) dictionary.add_option( "-f", @@ -104,7 +104,6 @@ def parse_arguments(): help="Add extensions to the end of every wordlist entry. By default dirsearch only replaces the %EXT% keyword with extensions", ) dictionary.add_option( - "-O", "--overwrite-extensions", action="store_true", dest="overwrite_extensions", @@ -115,7 +114,7 @@ def parse_arguments(): action="store", dest="exclude_extensions", metavar="EXTENSIONS", - help="Exclude extension list separated by commas (e.g. asp,jsp)", + help="Exclude extension list, separated by commas (e.g. asp,jsp)", ) dictionary.add_option( "--remove-extensions", @@ -153,7 +152,7 @@ def parse_arguments(): "-C", "--capital", action="store_true", - dest="capitalization", + dest="capital", help="Capital wordlist", ) @@ -497,20 +496,35 @@ def parse_arguments(): # Output Settings output = OptionGroup(parser, "Output Settings") + output.add_option( + "-O", + "--output-formats", + action="store", + dest="output_formats", + metavar="FORMAT", + help=f"Report formats, separated by commas (Available: {', '.join(OUTPUT_FORMATS)})", + ) output.add_option( "-o", - "--output", + "--output-file", action="store", - dest="output", - metavar="PATH/URL", - help="Output file or MySQL/PostgreSQL URL (Format: scheme://[username:password@]host[:port]/database-name)", + dest="output_file", + metavar="PATH", + help="Output file location", ) output.add_option( - "--format", + "--mysql-url", action="store", - dest="output_format", - metavar="FORMAT", - help=f"Report format (Available: {', '.join(OUTPUT_FORMATS)})", + dest="mysql_url", + metavar="URL", + help="Database URL for MySQL output (Format: mysql://[username:password@]host[:port]/database-name)", + ) + output.add_option( + "--postgres-url", + action="store", + dest="postgres_url", + metavar="URL", + help="Database URL for PostgreSQL output (Format: postgres://[username:password@]host[:port]/database-name)", ) output.add_option( "--log", action="store", dest="log_file", metavar="PATH", help="Log file" diff --git a/lib/reports/__init__.py b/lib/report/__init__.py similarity index 100% rename from lib/reports/__init__.py rename to lib/report/__init__.py diff --git a/lib/report/csv_report.py b/lib/report/csv_report.py new file mode 100755 index 000000000..f3cf01398 --- /dev/null +++ b/lib/report/csv_report.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# Author: Mauro Soria + +from defusedcsv import csv + +from lib.core.decorators import locked +from lib.report.factory import BaseReport, FileReportMixin + + +class CSVReport(FileReportMixin, BaseReport): + __format__ = "csv" + __extension__ = "csv" + + def new(self): + return [["URL", "Status", "Size", "Content Type", "Redirection"]] + + def parse(self, file): + with open(file) as fh: + rows = list(csv.reader(fh, delimiter=",", quotechar='"')) + # Not a dirsearch CSV report + if rows[0] != self.new()[0]: + raise Exception + + return rows + + @locked + def save(self, file, result): + rows = self.parse(file) + rows.append([result.url, result.status, result.length, result.type, result.redirect]) + self.write(file, rows) + + def write(self, file, rows): + with open(file, "w") as fh: + writer = csv.writer(fh, delimiter=",", quotechar='"') + for row in rows: + writer.writerow(row) diff --git a/lib/report/factory.py b/lib/report/factory.py new file mode 100755 index 000000000..a0024582e --- /dev/null +++ b/lib/report/factory.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# Author: Mauro Soria + +from abc import ABC, abstractmethod + +from lib.core.decorators import locked +from lib.core.exceptions import CannotConnectException, FileExistsException +from lib.utils.file import FileUtils + + +class BaseReport(ABC): + @abstractmethod + def initiate(self): + raise NotImplementedError + + @abstractmethod + def save(self, result): + raise NotImplementedError + + +class FileReportMixin: + def initiate(self, file): + FileUtils.create_dir(FileUtils.parent(file)) + if FileUtils.exists(file) and not FileUtils.is_empty(file): + self.validate(file) + else: + self.write(file, self.new()) + + def validate(self, file): + try: + self.parse(file) + except Exception: + raise FileExistsException(f"Output file {file} already exists") + + def parse(self, file): + return open(file, "r").read() + + def write(self, file, data): + with open(file, "w") as fh: + fh.write(data) + + def finish(self): + pass + + +class SQLReportMixin: + # Reuse the connection + _conn = None + + def get_connection(self, database): + # Reuse the old connection + if not self._reuse: + return self.connect(database) + + if not self._conn: + self._conn = self.connect(database) + + return self._conn + + def get_drop_table_query(self, table): + return (f'''DROP TABLE IF EXISTS "{table}";''',) + + def get_create_table_query(self, table): + return (f'''CREATE TABLE "{table}" ( + time TIMESTAMP, + url TEXT, + status_code INTEGER, + content_length INTEGER, + content_type TEXT, + redirect TEXT + );''',) + + def get_insert_table_query(self, table, values): + return (f'''INSERT INTO "{table}" (time, url, status_code, content_length, content_type, redirect) + VALUES + (%s, %s, %s, %s, %s, %s);''', values) + + def initiate(self, database, table): + try: + conn = self.get_connection(database) + except Exception as e: + raise CannotConnectException(f"Cannot connect to the SQL database: {str(e)}") + + cursor = conn.cursor() + + cursor.execute(*self.get_drop_table_query(table)) + cursor.execute(*self.get_create_table_query(table)) + conn.commit() + + if not self._reuse: + conn.close() + + @locked + def save(self, database, table, result): + conn = self.get_connection(database) + cursor = conn.cursor() + + cursor.execute( + *self.get_insert_table_query( + table, + ( + result.datetime, + result.url, + result.status, + result.length, + result.type, + result.redirect, + ), + ) + ) + conn.commit() + + if not self._reuse: + conn.close() + + def finish(self): + if self._conn: + self._conn.close() diff --git a/lib/report/html_report.py b/lib/report/html_report.py new file mode 100755 index 000000000..bacd023f8 --- /dev/null +++ b/lib/report/html_report.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# Author: Mauro Soria + +import json +import os + +from jinja2 import Environment, FileSystemLoader + +from lib.core.decorators import locked +from lib.core.settings import COMMAND, START_TIME +from lib.report.factory import BaseReport, FileReportMixin + + +class HTMLReport(FileReportMixin, BaseReport): + __format__ = "html" + __extension__ = "html" + + def new(self): + return self.generate([]) + + def parse(self, file): + with open(file) as fh: + while 1: + line = fh.readline() + # Gotta be the worst way to parse it but I don't know a better way:P + if line.startswith(" resources: "): + return json.loads(line[19:-2]) + + @locked + def save(self, file, result): + results = self.parse(file) + results.append({ + "url": result.url, + "status": result.status, + "contentLength": result.length, + "contentType": result.type, + "redirect": result.redirect, + }) + self.write(file, self.generate(results)) + + def generate(self, results): + file_loader = FileSystemLoader( + os.path.dirname(os.path.realpath(__file__)) + "/templates/" + ) + env = Environment(loader=file_loader) + template = env.get_template("html_report_template.html") + return template.render( + metadata={"command": COMMAND, "date": START_TIME}, + results=results, + ) diff --git a/lib/report/json_report.py b/lib/report/json_report.py new file mode 100755 index 000000000..adf61aa68 --- /dev/null +++ b/lib/report/json_report.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# Author: Mauro Soria + +import json + +from lib.core.decorators import locked +from lib.core.settings import COMMAND, START_TIME +from lib.report.factory import BaseReport, FileReportMixin + + +class JSONReport(FileReportMixin, BaseReport): + __format__ = "json" + __extension__ = "json" + + def new(self): + return { + "info": {"args": COMMAND, "time": START_TIME}, + "results": [], + } + + def parse(self, file): + with open(file) as fh: + return json.load(fh) + + @locked + def save(self, file, result): + data = self.parse(file) + data["results"].append({ + "url": result.url, + "status": result.status, + "contentLength": result.length, + "contentType": result.type, + "redirect": result.redirect, + }) + self.write(file, data) + + def write(self, file, data): + with open(file, "w") as fh: + json.dump(data, fh, sort_keys=True, indent=4) diff --git a/lib/report/manager.py b/lib/report/manager.py new file mode 100755 index 000000000..536413a6b --- /dev/null +++ b/lib/report/manager.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# Author: Mauro Soria + +from urllib.parse import urlparse + +from lib.core.data import options +from lib.core.settings import STANDARD_PORTS, START_DATETIME +from lib.report.csv_report import CSVReport +from lib.report.html_report import HTMLReport +from lib.report.json_report import JSONReport +from lib.report.markdown_report import MarkdownReport +from lib.report.mysql_report import MySQLReport +from lib.report.plain_text_report import PlainTextReport +from lib.report.postgresql_report import PostgreSQLReport +from lib.report.simple_report import SimpleReport +from lib.report.sqlite_report import SQLiteReport +from lib.report.xml_report import XMLReport + + +output_handlers = { + "simple": (SimpleReport, [options["output_file"]]), + "plain": (PlainTextReport, [options["output_file"]]), + "json": (JSONReport, [options["output_file"]]), + "xml": (XMLReport, [options["output_file"]]), + "md": (MarkdownReport, [options["output_file"]]), + "csv": (CSVReport, [options["output_file"]]), + "html": (HTMLReport, [options["output_file"]]), + "sqlite": (SQLiteReport, [options["output_file"], options["output_table"]]), + "mysql": (MySQLReport, [options["mysql_url"], options["output_table"]]), + "postgresql": (PostgreSQLReport, [options["postgres_url"], options["output_table"]]), +} + + +class ReportManager: + def __init__(self, formats): + self.reports = [] + + for format in formats: + # No output location provided + if any(not _ for _ in output_handlers[format][1]): + continue + self.reports.append((output_handlers[format][0](), output_handlers[format][1])) + + def prepare(self, target): + for reporter, sources in self.reports: + reporter.initiate( + *map( + lambda s: self.format(s, target, reporter), + sources, + ) + ) + + def save(self, result): + for reporter, sources in self.reports: + reporter.save( + *map( + lambda s: self.format(s, result.url, reporter), + sources, + ), + result, + ) + + def finish(self): + for reporter, sources in self.reports: + reporter.finish() + + def format(self, string, target, handler): + parsed = urlparse(target) + + return string.format( + # Get date from datetime string + date=START_DATETIME.split()[0], + host=parsed.hostname, + scheme=parsed.scheme, + port=parsed.port or STANDARD_PORTS[parsed.scheme], + format=handler.__format__, + extension=handler.__extension__, + ) diff --git a/lib/reports/markdown_report.py b/lib/report/markdown_report.py similarity index 63% rename from lib/reports/markdown_report.py rename to lib/report/markdown_report.py index 7075b886a..e2d5e715a 100755 --- a/lib/reports/markdown_report.py +++ b/lib/report/markdown_report.py @@ -16,28 +16,31 @@ # # Author: Mauro Soria -import time -import sys +from lib.core.decorators import locked +from lib.core.settings import ( + COMMAND, + NEW_LINE, + START_TIME, +) +from lib.report.factory import BaseReport, FileReportMixin -from lib.core.settings import NEW_LINE -from lib.reports.base import FileBaseReport +class MarkdownReport(FileReportMixin, BaseReport): + __format__ = "markdown" + __extension__ = "md" -class MarkdownReport(FileBaseReport): - def get_header(self): + def new(self): header = "### Information" + NEW_LINE - header += f"Command: {chr(32).join(sys.argv)}" + header += f"Command: {COMMAND}" header += NEW_LINE - header += f"Time: {time.ctime()}" + header += f"Time: {START_TIME}" header += NEW_LINE * 2 header += "URL | Status | Size | Content Type | Redirection" + NEW_LINE header += "----|--------|------|--------------|------------" + NEW_LINE return header - def generate(self, entries): - output = self.get_header() - - for entry in entries: - output += f"{entry.url} | {entry.status} | {entry.length} | {entry.type} | {entry.redirect}" + NEW_LINE - - return output + @locked + def save(self, file, result): + md = self.parse(file) + md += f"{result.url} | {result.status} | {result.length} | {result.type} | {result.redirect}" + NEW_LINE + self.write(file, md) diff --git a/lib/reports/mysql_report.py b/lib/report/mysql_report.py similarity index 76% rename from lib/reports/mysql_report.py rename to lib/report/mysql_report.py index 246b30e85..3f52f9221 100755 --- a/lib/reports/mysql_report.py +++ b/lib/report/mysql_report.py @@ -22,22 +22,29 @@ from urllib.parse import urlparse from lib.core.exceptions import InvalidURLException -from lib.reports.base import SQLBaseReport +from lib.report.factory import BaseReport, SQLReportMixin -class MySQLReport(SQLBaseReport): - def connect(self, url): - parsed = urlparse(url) +class MySQLReport(SQLReportMixin, BaseReport): + __format__ = "sql" + __extension__ = None + _reuse = True + + def is_valid(self, url): + return url.startswith("mysql://") - if not parsed.scheme == "mysql": + def connect(self, url): + if not self.is_valid(url): raise InvalidURLException("Provided MySQL URL does not start with mysql://") - self.conn = mysql.connector.connect( + parsed = urlparse(url) + conn = mysql.connector.connect( host=parsed.hostname, port=parsed.port or 3306, user=parsed.username, password=parsed.password, database=parsed.path.lstrip("/"), ) - self.conn.sql_mode = [SQLMode.ANSI_QUOTES] - self.cursor = self.conn.cursor() + conn.sql_mode = [SQLMode.ANSI_QUOTES] + + return conn diff --git a/lib/reports/plain_text_report.py b/lib/report/plain_text_report.py similarity index 50% rename from lib/reports/plain_text_report.py rename to lib/report/plain_text_report.py index c078e1808..89105e181 100755 --- a/lib/reports/plain_text_report.py +++ b/lib/report/plain_text_report.py @@ -16,28 +16,32 @@ # # Author: Mauro Soria -import time -import sys +from lib.core.decorators import locked +from lib.core.settings import ( + COMMAND, + NEW_LINE, + START_TIME, +) +from lib.report.factory import BaseReport, FileReportMixin +from lib.utils.common import get_readable_size -from lib.core.settings import NEW_LINE -from lib.reports.base import FileBaseReport -from lib.utils.common import human_size +class PlainTextReport(FileReportMixin, BaseReport): + __format__ = "plain" + __extension__ = "txt" -class PlainTextReport(FileBaseReport): - def get_header(self): - return f"# Dirsearch started {time.ctime()} as: {chr(32).join(sys.argv)}" + NEW_LINE * 2 + def new(self): + return f"# Dirsearch started {START_TIME} as: {COMMAND}" + NEW_LINE * 2 - def generate(self, entries): - output = self.get_header() + @locked + def save(self, file, result): + readable_size = get_readable_size(result.length) + data = self.parse(file) + data += f"{result.status} {readable_size.rjust(6, chr(32))} {result.url}" - for entry in entries: - readable_size = human_size(entry.length) - output += f"{entry.status} {readable_size.rjust(6, chr(32))} {entry.url}" + if result.redirect: + data += f" -> {result.redirect}" - if entry.redirect: - output += f" -> REDIRECTS TO: {entry.redirect}" + data += NEW_LINE - output += NEW_LINE - - return output + self.write(file, data) diff --git a/lib/reports/postgresql_report.py b/lib/report/postgresql_report.py similarity index 73% rename from lib/reports/postgresql_report.py rename to lib/report/postgresql_report.py index 68c67962c..bd9fd667b 100755 --- a/lib/reports/postgresql_report.py +++ b/lib/report/postgresql_report.py @@ -19,13 +19,19 @@ import psycopg from lib.core.exceptions import InvalidURLException -from lib.reports.base import SQLBaseReport +from lib.report.factory import BaseReport, SQLReportMixin -class PostgreSQLReport(SQLBaseReport): +class PostgreSQLReport(SQLReportMixin, BaseReport): + __format__ = "sql" + __extension__ = None + _reuse = True + + def is_valid(self, url): + return url.startswith(("postgres://", "postgresql://")) + def connect(self, url): - if not url.startswith("postgresql://"): + if not self.is_valid(url): raise InvalidURLException("Provided PostgreSQL URL does not start with postgresql://") - self.conn = psycopg.connect(url) - self.cursor = self.conn.cursor() + return psycopg.connect(url) diff --git a/lib/reports/simple_report.py b/lib/report/simple_report.py similarity index 67% rename from lib/reports/simple_report.py rename to lib/report/simple_report.py index d0bb00717..7401f7622 100755 --- a/lib/reports/simple_report.py +++ b/lib/report/simple_report.py @@ -16,10 +16,20 @@ # # Author: Mauro Soria +from lib.core.decorators import locked from lib.core.settings import NEW_LINE -from lib.reports.base import FileBaseReport +from lib.report.factory import BaseReport, FileReportMixin -class SimpleReport(FileBaseReport): - def generate(self, entries): - return NEW_LINE.join(entry.url for entry in entries) +class SimpleReport(FileReportMixin, BaseReport): + __format__ = "simple" + __extension__ = "txt" + + def new(self): + return "" + + @locked + def save(self, file, result): + data = self.parse(file) + data += result.url + NEW_LINE + self.write(file, data) diff --git a/lib/reports/sqlite_report.py b/lib/report/sqlite_report.py similarity index 54% rename from lib/reports/sqlite_report.py rename to lib/report/sqlite_report.py index 7c9d2c27b..59c311cb4 100755 --- a/lib/reports/sqlite_report.py +++ b/lib/report/sqlite_report.py @@ -18,17 +18,18 @@ import sqlite3 -from lib.reports.base import SQLBaseReport +from lib.report.factory import BaseReport, SQLReportMixin +from lib.utils.file import FileUtils -class SQLiteReport(SQLBaseReport): - def connect(self, output_file): - self.conn = sqlite3.connect(output_file, check_same_thread=False) - self.cursor = self.conn.cursor() +class SQLiteReport(SQLReportMixin, BaseReport): + __format__ = "sql" + __extension__ = "sqlite" + _reuse = False - def create_table_query(self, table): + def get_create_table_query(self, table): return (f'''CREATE TABLE "{table}" ( - time DATETIME DEFAULT CURRENT_TIMESTAMP, + time DATETIME, url TEXT, status_code INTEGER, content_length INTEGER, @@ -36,7 +37,16 @@ def create_table_query(self, table): redirect TEXT );''',) - def insert_table_query(self, table, values): - return (f'''INSERT INTO "{table}" (url, status_code, content_length, content_type, redirect) - VALUES - (?, ?, ?, ?, ?)''', values) + def get_insert_table_query(self, table, values): + return (f'INSERT INTO "{table}" VALUES (?, ?, ?, ?, ?, ?);', values) + + def connect(self, file): + FileUtils.create_dir(FileUtils.parent(file)) + conn = sqlite3.connect(file, check_same_thread=False) + # Check if the file is a proper sqlite database + try: + conn.cursor().execute("PRAGMA integrity_check") + except sqlite3.DatabaseError: + raise Exception(f"{file} is not empty or is not a SQLite database") + else: + return conn diff --git a/lib/reports/templates/html_report_template.html b/lib/report/templates/html_report_template.html similarity index 84% rename from lib/reports/templates/html_report_template.html rename to lib/report/templates/html_report_template.html index 94760f1ff..3241658b2 100644 --- a/lib/reports/templates/html_report_template.html +++ b/lib/report/templates/html_report_template.html @@ -39,6 +39,7 @@