diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8551da363..cda75213f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -24,7 +24,7 @@ jobs:
pip install codespell flake8 -r requirements.txt
- name: Test
run: |
- python3 dirsearch.py -w ./tests/static/wordlist.txt -u https://example.com -o tmp_report.json --format json --force-recursive -R 3 --full-url -q -O
+ python3 dirsearch.py -w ./tests/static/wordlist.txt -u https://example.com -o tmp_report.json --output-formats json --force-recursive -R 3 --full-url -q
python3 dirsearch.py -w ./tests/static/wordlist.txt -l ./tests/static/targets.txt --subdirs /,admin/ --exclude-extensions conf -q -L -f -i 200 --user-agent a --log tmp_log.log
python3 dirsearch.py -w ./tests/static/wordlist.txt --nmap-report ./tests/static/nmap.xml --max-rate 2 -H K:V --random-agent --overwrite-extensions --no-color
python3 dirsearch.py -w ./tests/static/wordlist.txt --raw ./tests/static/raw.txt --prefixes . --suffixes ~ --skip-on-status 404 -m POST -d test=1 --crawl --min-response-size 9
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4677bafa5..604665333 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,10 @@
# Changelog
## [Unreleased]
+- Ability to use multiple output formats
+- MySQL and PostgreSQL report formats
+- Support variables in file path and SQL table name for saving results
- Support non-default network interface
-- Remove unused dependencies (urllib3, cryptography, cffi, idna, chardet)
- Load targets from a Nmap XML report
- Added --async option to enable asynchronous mode (use coroutines instead of threads)
diff --git a/config.ini b/config.ini
index 7a005a2ae..dc266ee5f 100644
--- a/config.ini
+++ b/config.ini
@@ -14,18 +14,18 @@ exclude-subdirs = %%ff/,.;/,..;/,;/,./,../,%%2e/,%%2e%%2e/
random-user-agents = False
max-time = 0
exit-on-error = False
-# subdirs = /,api/
-# include-status = 200-299,401
-# exclude-status = 400,500-999
-# exclude-sizes = 0b,123gb
-# exclude-texts = [
-# "Not found",
-# "404"
-# ]
-# exclude-regex = "^403$"
-# exclude-redirect = "*/error.html"
-# exclude-response = 404.html
-# skip-on-status = 429,999
+#subdirs = /,api/
+#include-status = 200-299,401
+#exclude-status = 400,500-999
+#exclude-sizes = 0b,123gb
+#exclude-texts = [
+# "Not found",
+# "404"
+#]
+#exclude-regex = "^403$"
+#exclude-redirect = "*/error.html"
+#exclude-response = 404.html
+#skip-on-status = 429,999
[dictionary]
default-extensions = php,aspx,jsp,html,js
@@ -33,34 +33,34 @@ force-extensions = False
overwrite-extensions = False
lowercase = False
uppercase = False
-capitalization = False
-# exclude-extensions = old,log
-# prefixes = .,admin
-# suffixes = ~,.bak
-# wordlists = /path/to/wordlist1.txt,/path/to/wordlist2.txt
+capital = False
+#exclude-extensions = old,log
+#prefixes = .,admin
+#suffixes = ~,.bak
+#wordlists = /path/to/wordlist1.txt,/path/to/wordlist2.txt
[request]
http-method = get
follow-redirects = False
-# headers = [
-# "Header1: Value",
-# "Header2: Value"
-# ]
-# headers-file = /path/to/headers.txt
-# user-agent = MyUserAgent
-# cookie = SESSIONID=123
+#headers = [
+# "Header1: Value",
+# "Header2: Value"
+#]
+#headers-file = /path/to/headers.txt
+#user-agent = MyUserAgent
+#cookie = SESSIONID=123
[connection]
timeout = 7.5
delay = 0
max-rate = 0
max-retries = 1
-## By disabling `scheme` variable, dirsearch will automatically identify the URI scheme
-# scheme = http
-# proxies = ["localhost:8080"]
-# proxies-file = /path/to/proxies.txt
-# replay-proxy = localhost:8000
-# network-interface = eth0
+# By disabling `scheme` variable, dirsearch will automatically identify the URI scheme
+#scheme = http
+#proxies = ["localhost:8080"]
+#proxies-file = /path/to/proxies.txt
+#replay-proxy = localhost:8000
+#network-interface = eth0
[advanced]
crawl = False
@@ -72,9 +72,21 @@ color = True
show-redirects-history = False
[output]
-## Support: plain, simple, json, xml, md, csv, html, sqlite, mysql, postgresql
-report-format = plain
-autosave-report = True
-autosave-report-folder = reports/
-# log-file = /path/to/dirsearch.log
-# log-file-size = 50000000
+# Available: simple, plain, json, xml, md, csv, html, sqlite, mysql, postgresql
+output-formats = plain
+# Supported variables for 'output-file and 'output-sql-table':
+# - {extension}: File extension of the report, for 'output-file' only (e.g. txt, json)
+# - {format}: Output format (e.g. plain, simple, xml)
+# - {host}: Target hostname or IP (e.g. example.com)
+# - {scheme}: URI scheme (http or https)
+# - {port}: Port number (e.g. 443)
+# - {date}: Scan date, format: DD-MM-YYYY (e.g. 07-10-2022)
+#
+# For output formats other than PostgreSQL and MySQL
+#output-file = reports/{host}/{scheme}_{port}.{extension}
+#mysql-url = mysql://user:password@localhost/database
+#postgres-url = postgres://user:password@localhost/database
+# Table to be used for SQL output
+output-sql-table = {scheme}_{host}:{port}
+#log-file = /path/to/dirsearch.log
+#log-file-size = 50000000
diff --git a/lib/connection/response.py b/lib/connection/response.py
index b9a56fbc9..14937cea5 100755
--- a/lib/connection/response.py
+++ b/lib/connection/response.py
@@ -16,6 +16,7 @@
#
# Author: Mauro Soria
+import time
import httpx
from lib.core.settings import (
@@ -25,11 +26,12 @@
UNKNOWN,
)
from lib.parse.url import clean_path, parse_path
-from lib.utils.common import is_binary
+from lib.utils.common import get_readable_size, is_binary
class BaseResponse:
def __init__(self, response):
+ self.datetime = time.strftime("%Y-%m-%d %H:%M:%S")
self.url = str(response.url)
self.full_path = parse_path(self.url)
self.path = clean_path(self.full_path)
@@ -54,6 +56,10 @@ def length(self):
except TypeError:
return len(self.body)
+ @property
+ def size(self):
+ return get_readable_size(self.length)
+
def __hash__(self):
return hash(self.body)
diff --git a/lib/controller/controller.py b/lib/controller/controller.py
index 8abae1871..435598757 100755
--- a/lib/controller/controller.py
+++ b/lib/controller/controller.py
@@ -32,6 +32,8 @@
from lib.core.decorators import locked
from lib.core.dictionary import Dictionary, get_blacklists
from lib.core.exceptions import (
+ CannotConnectException,
+ FileExistsException,
InvalidRawRequest,
InvalidURLException,
RequestException,
@@ -47,23 +49,13 @@
EXTENSION_RECOGNITION_REGEX,
MAX_CONSECUTIVE_REQUEST_ERRORS,
NEW_LINE,
- SCRIPT_PATH,
STANDARD_PORTS,
UNKNOWN,
)
from lib.parse.rawrequest import parse_raw
from lib.parse.url import clean_path, parse_path
-from lib.reports.csv_report import CSVReport
-from lib.reports.html_report import HTMLReport
-from lib.reports.json_report import JSONReport
-from lib.reports.markdown_report import MarkdownReport
-from lib.reports.mysql_report import MySQLReport
-from lib.reports.plain_text_report import PlainTextReport
-from lib.reports.postgresql_report import PostgreSQLReport
-from lib.reports.simple_report import SimpleReport
-from lib.reports.sqlite_report import SQLiteReport
-from lib.reports.xml_report import XMLReport
-from lib.utils.common import get_valid_filename, lstrip_once
+from lib.report.manager import ReportManager
+from lib.utils.common import lstrip_once
from lib.utils.file import FileUtils
from lib.utils.pickle import pickle, unpickle
from lib.utils.schemedet import detect_scheme
@@ -143,12 +135,9 @@ def setup(self):
self.requester = Requester()
self.dictionary = Dictionary(files=options["wordlists"])
- self.results = []
self.start_time = time.time()
self.passed_urls = set()
self.directories = []
- self.report = None
- self.batch = False
self.jobs_processed = 0
self.errors = 0
self.consecutive_errors = 0
@@ -164,8 +153,6 @@ def setup(self):
self.requester.set_proxy_auth(options["proxy_auth"])
if options["log_file"]:
- options["log_file"] = FileUtils.get_abs_path(options["log_file"])
-
try:
FileUtils.create_dir(FileUtils.parent(options["log_file"]))
if not FileUtils.can_write(options["log_file"]):
@@ -179,27 +166,11 @@ def setup(self):
)
exit(1)
- if options["autosave_report"] and not options["output"]:
- self.report_path = options["output_path"] or FileUtils.build_path(
- SCRIPT_PATH, "reports"
- )
-
- try:
- FileUtils.create_dir(self.report_path)
- if not FileUtils.can_write(self.report_path):
- raise Exception
-
- except Exception:
- interface.error(
- f"Couldn't create report folder at {self.report_path}"
- )
- exit(1)
-
interface.header(BANNER)
interface.config(len(self.dictionary))
try:
- self.setup_reports()
+ self.reporter = ReportManager(options["output_formats"])
except (
InvalidURLException,
mysql.connector.Error,
@@ -219,7 +190,7 @@ def run(self):
# error_callbacks callback values:
# - *args[0]: exception
match_callbacks = (
- self.match_callback, self.reset_consecutive_errors
+ self.match_callback, self.reporter.save, self.reset_consecutive_errors
)
not_found_callbacks = (
self.update_progress_bar, self.reset_consecutive_errors
@@ -246,9 +217,12 @@ def run(self):
if not self.old_session:
interface.target(self.url)
+ self.reporter.prepare(self.url)
self.start()
except (
+ CannotConnectException,
+ FileExistsException,
InvalidURLException,
RequestException,
SkipTargetInterrupt,
@@ -261,6 +235,7 @@ def run(self):
interface.error(str(e))
except QuitInterrupt as e:
+ self.reporter.finish()
interface.error(e.args[0])
exit(0)
@@ -268,6 +243,7 @@ def run(self):
options["urls"].pop(0)
interface.warning("\nTask Completed")
+ self.reporter.finish()
if options["session_file"]:
try:
@@ -385,95 +361,6 @@ def set_target(self, url):
self.requester.set_url(self.url)
- def setup_batch_reports(self):
- """Create batch report folder"""
-
- self.batch = True
- current_time = time.strftime("%y-%m-%d_%H-%M-%S")
- batch_session = f"BATCH-{current_time}"
- batch_directory_path = FileUtils.build_path(self.report_path, batch_session)
-
- try:
- FileUtils.create_dir(batch_directory_path)
- except Exception:
- interface.error(f"Couldn't create batch folder at {batch_directory_path}")
- exit(1)
-
- return batch_directory_path
-
- def get_output_extension(self):
- if options["output_format"] in ("plain", "simple"):
- return "txt"
-
- return options["output_format"]
-
- def setup_reports(self):
- """Create report file"""
-
- output = options["output"]
-
- if options["autosave_report"] and not output and options["output_format"] not in ("mysql", "postgresql"):
- if len(options["urls"]) > 1:
- directory_path = self.setup_batch_reports()
- filename = "BATCH." + self.get_output_extension()
- else:
- self.set_target(options["urls"][0])
-
- parsed = urlparse(self.url)
-
- if not parsed.netloc:
- parsed = urlparse(f"//{options['urls'][0]}")
-
- filename = get_valid_filename(f"{parsed.path}_")
- filename += time.strftime("%y-%m-%d_%H-%M-%S")
- filename += f".{self.get_output_extension()}"
- directory_path = FileUtils.build_path(
- self.report_path, get_valid_filename(f"{parsed.scheme}_{parsed.netloc}")
- )
-
- output = FileUtils.get_abs_path((FileUtils.build_path(directory_path, filename)))
-
- if FileUtils.exists(output):
- i = 2
- while FileUtils.exists(f"{output}_{i}"):
- i += 1
-
- output += f"_{i}"
-
- try:
- FileUtils.create_dir(directory_path)
- except Exception:
- interface.error(
- f"Couldn't create the reports folder at {directory_path}"
- )
- exit(1)
-
- if not output:
- return
-
- if options["output_format"] == "plain":
- self.report = PlainTextReport(output)
- elif options["output_format"] == "json":
- self.report = JSONReport(output)
- elif options["output_format"] == "xml":
- self.report = XMLReport(output)
- elif options["output_format"] == "md":
- self.report = MarkdownReport(output)
- elif options["output_format"] == "csv":
- self.report = CSVReport(output)
- elif options["output_format"] == "html":
- self.report = HTMLReport(output)
- elif options["output_format"] == "sqlite":
- self.report = SQLiteReport(output)
- elif options["output_format"] == "mysql":
- self.report = MySQLReport(output)
- elif options["output_format"] == "postgresql":
- self.report = PostgreSQLReport(output)
- else:
- self.report = SimpleReport(output)
-
- interface.output_location(output)
-
def reset_consecutive_errors(self, response):
self.consecutive_errors = 0
@@ -511,10 +398,6 @@ def match_callback(self, response):
else:
self.requester.request(response.full_path, proxy=options["replay_proxy"])
- if self.report:
- self.results.append(response)
- self.report.save(self.results)
-
def update_progress_bar(self, response):
jobs_count = (
# Jobs left for unscanned targets
diff --git a/lib/core/exceptions.py b/lib/core/exceptions.py
index 69b481e8e..6a3940a07 100755
--- a/lib/core/exceptions.py
+++ b/lib/core/exceptions.py
@@ -17,10 +17,18 @@
# Author: Mauro Soria
+class CannotConnectException(Exception):
+ pass
+
+
class FailedDependenciesInstallation(Exception):
pass
+class FileExistsException(Exception):
+ pass
+
+
class InvalidRawRequest(Exception):
pass
diff --git a/lib/core/fuzzer.py b/lib/core/fuzzer.py
index 29a1519fe..4d0045028 100755
--- a/lib/core/fuzzer.py
+++ b/lib/core/fuzzer.py
@@ -35,7 +35,7 @@
WILDCARD_TEST_POINT_MARKER,
)
from lib.parse.url import clean_path
-from lib.utils.common import human_size, lstrip_once
+from lib.utils.common import get_readable_size, lstrip_once
from lib.utils.crawl import Crawler
@@ -95,13 +95,16 @@ def is_excluded(resp: BaseResponse) -> bool:
):
return True
- if resp.status in blacklists and any(
- resp.path.endswith(lstrip_once(suffix, "/"))
- for suffix in blacklists.get(resp.status)
+ if (
+ resp.status in blacklists
+ and any(
+ resp.path.endswith(lstrip_once(suffix, "/"))
+ for suffix in blacklists.get(resp.status)
+ )
):
return True
- if human_size(resp.length).rstrip() in options["exclude_sizes"]:
+ if get_readable_size(resp.length).rstrip() in options["exclude_sizes"]:
return True
if resp.length < options["minimum_response_size"]:
@@ -113,14 +116,15 @@ def is_excluded(resp: BaseResponse) -> bool:
if any(text in resp.content for text in options["exclude_texts"]):
return True
- if options["exclude_regex"] and re.search(
- options["exclude_regex"], resp.content
- ):
+ if options["exclude_regex"] and re.search(options["exclude_regex"], resp.content):
return True
- if options["exclude_redirect"] and (
- options["exclude_redirect"] in resp.redirect
- or re.search(options["exclude_redirect"], resp.redirect)
+ if (
+ options["exclude_redirect"]
+ and (
+ options["exclude_redirect"] in resp.redirect
+ or re.search(options["exclude_redirect"], resp.redirect)
+ )
):
return True
diff --git a/lib/core/options.py b/lib/core/options.py
index 8bcdce5be..6b915257f 100755
--- a/lib/core/options.py
+++ b/lib/core/options.py
@@ -32,7 +32,7 @@
def parse_options():
- opt = parse_config(parse_arguments())
+ opt = merge_config(parse_arguments())
if opt.session_file:
if opt.async_mode:
@@ -196,13 +196,33 @@ def parse_options():
)
exit(1)
- if opt.output_format not in OUTPUT_FORMATS:
- print(
- "Select one of the following output formats: "
- f"{', '.join(OUTPUT_FORMATS)}"
+ opt.output_formats = [format.strip() for format in opt.output_formats.split(",")]
+ invalid_formats = set(opt.output_formats).difference(OUTPUT_FORMATS)
+
+ if invalid_formats:
+ print(f"Invalid output format(s): {', '.join(invalid_formats)}")
+ exit(1)
+
+ # There are multiple file-based output formats but no variable to separate output files for different formats
+ if (
+ opt.output_file
+ and "{format}" not in opt.output_file
+ and len(opt.output_formats) - ("mysql" in opt.output_formats) - ("postgresql" in opt.output_formats) > 1
+ and (
+ "{extension}" not in opt.output_file
+ # "plain" and "simple" have the same file extension (txt)
+ or {"plain", "simple"}.issubset(opt.output_formats)
)
+ ):
+ print("Found at least 2 output formats sharing the same output file, make sure you use '{format}' and '{extension} variables in your output file")
exit(1)
+ if opt.log_file:
+ opt.log_file = FileUtils.get_abs_path(opt.log_file)
+
+ if opt.output_file:
+ opt.output_file = FileUtils.get_abs_path(opt.output_file)
+
return vars(opt)
@@ -243,7 +263,7 @@ def _access_file(path):
return fd
-def parse_config(opt):
+def merge_config(opt):
config = ConfigParser()
config.read(opt.config)
@@ -316,8 +336,8 @@ def parse_config(opt):
opt.suffixes = opt.suffixes or config.safe_get("dictionary", "suffixes", "")
opt.lowercase = opt.lowercase or config.safe_getboolean("dictionary", "lowercase")
opt.uppercase = opt.uppercase or config.safe_getboolean("dictionary", "uppercase")
- opt.capitalization = opt.capitalization or config.safe_getboolean(
- "dictionary", "capitalization"
+ opt.capital = opt.capital or config.safe_getboolean(
+ "dictionary", "capital"
)
# Request
@@ -364,12 +384,14 @@ def parse_config(opt):
)
# Output
- opt.output_path = config.safe_get("output", "autosave-report-folder")
- opt.autosave_report = config.safe_getboolean("output", "autosave-report")
- opt.log_file_size = config.safe_getint("output", "log-file-size")
- opt.log_file = opt.log_file or config.safe_get("output", "log-file")
- opt.output_format = opt.output_format or config.safe_get(
- "output", "report-format", "plain", OUTPUT_FORMATS
+ opt.output_file = opt.output_file or config.safe_get("output", "output-file")
+ opt.mysql_url = opt.mysql_url or config.safe_get("output", "mysql-url")
+ opt.postgres_url = opt.postgres_url or config.safe_get("output", "postgres-url")
+ opt.output_table = config.safe_get("output", "output-sql-table")
+ opt.output_formats = opt.output_formats or config.safe_get(
+ "output", "output-format", "plain"
)
+ opt.log_file = opt.log_file or config.safe_get("output", "log-file")
+ opt.log_file_size = config.safe_getint("output", "log-file-size")
return opt
diff --git a/lib/core/settings.py b/lib/core/settings.py
index 05b43dafe..7d663b107 100755
--- a/lib/core/settings.py
+++ b/lib/core/settings.py
@@ -19,6 +19,7 @@
import os
import sys
import string
+import time
from lib.utils.file import FileUtils
@@ -30,6 +31,12 @@
(_||| _) (/_(_|| (_| )
"""
+COMMAND = " ".join(sys.argv)
+
+START_TIME = time.strftime("%Y-%m-%d %H:%M:%S")
+
+START_DATETIME = time.strftime("%Y-%m-%d")
+
SCRIPT_PATH = FileUtils.parent(__file__, 3)
OPTIONS_FILE = "options.ini"
@@ -62,8 +69,6 @@
STANDARD_PORTS = {"http": 80, "https": 443}
-INSECURE_CSV_CHARS = ("+", "-", "=", "@")
-
DEFAULT_TEST_PREFIXES = (".",)
DEFAULT_TEST_SUFFIXES = ("/",)
diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py
index a63554a54..e0908db58 100755
--- a/lib/parse/cmdline.py
+++ b/lib/parse/cmdline.py
@@ -94,7 +94,7 @@ def parse_arguments():
"--extensions",
action="store",
dest="extensions",
- help="Extension list separated by commas (e.g. php,asp)",
+ help="Extension list, separated by commas (e.g. php,asp)",
)
dictionary.add_option(
"-f",
@@ -104,7 +104,6 @@ def parse_arguments():
help="Add extensions to the end of every wordlist entry. By default dirsearch only replaces the %EXT% keyword with extensions",
)
dictionary.add_option(
- "-O",
"--overwrite-extensions",
action="store_true",
dest="overwrite_extensions",
@@ -115,7 +114,7 @@ def parse_arguments():
action="store",
dest="exclude_extensions",
metavar="EXTENSIONS",
- help="Exclude extension list separated by commas (e.g. asp,jsp)",
+ help="Exclude extension list, separated by commas (e.g. asp,jsp)",
)
dictionary.add_option(
"--remove-extensions",
@@ -153,7 +152,7 @@ def parse_arguments():
"-C",
"--capital",
action="store_true",
- dest="capitalization",
+ dest="capital",
help="Capital wordlist",
)
@@ -497,20 +496,35 @@ def parse_arguments():
# Output Settings
output = OptionGroup(parser, "Output Settings")
+ output.add_option(
+ "-O",
+ "--output-formats",
+ action="store",
+ dest="output_formats",
+ metavar="FORMAT",
+ help=f"Report formats, separated by commas (Available: {', '.join(OUTPUT_FORMATS)})",
+ )
output.add_option(
"-o",
- "--output",
+ "--output-file",
action="store",
- dest="output",
- metavar="PATH/URL",
- help="Output file or MySQL/PostgreSQL URL (Format: scheme://[username:password@]host[:port]/database-name)",
+ dest="output_file",
+ metavar="PATH",
+ help="Output file location",
)
output.add_option(
- "--format",
+ "--mysql-url",
action="store",
- dest="output_format",
- metavar="FORMAT",
- help=f"Report format (Available: {', '.join(OUTPUT_FORMATS)})",
+ dest="mysql_url",
+ metavar="URL",
+ help="Database URL for MySQL output (Format: mysql://[username:password@]host[:port]/database-name)",
+ )
+ output.add_option(
+ "--postgres-url",
+ action="store",
+ dest="postgres_url",
+ metavar="URL",
+ help="Database URL for PostgreSQL output (Format: postgres://[username:password@]host[:port]/database-name)",
)
output.add_option(
"--log", action="store", dest="log_file", metavar="PATH", help="Log file"
diff --git a/lib/reports/__init__.py b/lib/report/__init__.py
similarity index 100%
rename from lib/reports/__init__.py
rename to lib/report/__init__.py
diff --git a/lib/report/csv_report.py b/lib/report/csv_report.py
new file mode 100755
index 000000000..f3cf01398
--- /dev/null
+++ b/lib/report/csv_report.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+#
+# Author: Mauro Soria
+
+from defusedcsv import csv
+
+from lib.core.decorators import locked
+from lib.report.factory import BaseReport, FileReportMixin
+
+
+class CSVReport(FileReportMixin, BaseReport):
+ __format__ = "csv"
+ __extension__ = "csv"
+
+ def new(self):
+ return [["URL", "Status", "Size", "Content Type", "Redirection"]]
+
+ def parse(self, file):
+ with open(file) as fh:
+ rows = list(csv.reader(fh, delimiter=",", quotechar='"'))
+ # Not a dirsearch CSV report
+ if rows[0] != self.new()[0]:
+ raise Exception
+
+ return rows
+
+ @locked
+ def save(self, file, result):
+ rows = self.parse(file)
+ rows.append([result.url, result.status, result.length, result.type, result.redirect])
+ self.write(file, rows)
+
+ def write(self, file, rows):
+ with open(file, "w") as fh:
+ writer = csv.writer(fh, delimiter=",", quotechar='"')
+ for row in rows:
+ writer.writerow(row)
diff --git a/lib/report/factory.py b/lib/report/factory.py
new file mode 100755
index 000000000..a0024582e
--- /dev/null
+++ b/lib/report/factory.py
@@ -0,0 +1,133 @@
+# -*- coding: utf-8 -*-
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+#
+# Author: Mauro Soria
+
+from abc import ABC, abstractmethod
+
+from lib.core.decorators import locked
+from lib.core.exceptions import CannotConnectException, FileExistsException
+from lib.utils.file import FileUtils
+
+
+class BaseReport(ABC):
+ @abstractmethod
+ def initiate(self):
+ raise NotImplementedError
+
+ @abstractmethod
+ def save(self, result):
+ raise NotImplementedError
+
+
+class FileReportMixin:
+ def initiate(self, file):
+ FileUtils.create_dir(FileUtils.parent(file))
+ if FileUtils.exists(file) and not FileUtils.is_empty(file):
+ self.validate(file)
+ else:
+ self.write(file, self.new())
+
+ def validate(self, file):
+ try:
+ self.parse(file)
+ except Exception:
+ raise FileExistsException(f"Output file {file} already exists")
+
+ def parse(self, file):
+ return open(file, "r").read()
+
+ def write(self, file, data):
+ with open(file, "w") as fh:
+ fh.write(data)
+
+ def finish(self):
+ pass
+
+
+class SQLReportMixin:
+ # Reuse the connection
+ _conn = None
+
+ def get_connection(self, database):
+ # Reuse the old connection
+ if not self._reuse:
+ return self.connect(database)
+
+ if not self._conn:
+ self._conn = self.connect(database)
+
+ return self._conn
+
+ def get_drop_table_query(self, table):
+ return (f'''DROP TABLE IF EXISTS "{table}";''',)
+
+ def get_create_table_query(self, table):
+ return (f'''CREATE TABLE "{table}" (
+ time TIMESTAMP,
+ url TEXT,
+ status_code INTEGER,
+ content_length INTEGER,
+ content_type TEXT,
+ redirect TEXT
+ );''',)
+
+ def get_insert_table_query(self, table, values):
+ return (f'''INSERT INTO "{table}" (time, url, status_code, content_length, content_type, redirect)
+ VALUES
+ (%s, %s, %s, %s, %s, %s);''', values)
+
+ def initiate(self, database, table):
+ try:
+ conn = self.get_connection(database)
+ except Exception as e:
+ raise CannotConnectException(f"Cannot connect to the SQL database: {str(e)}")
+
+ cursor = conn.cursor()
+
+ cursor.execute(*self.get_drop_table_query(table))
+ cursor.execute(*self.get_create_table_query(table))
+ conn.commit()
+
+ if not self._reuse:
+ conn.close()
+
+ @locked
+ def save(self, database, table, result):
+ conn = self.get_connection(database)
+ cursor = conn.cursor()
+
+ cursor.execute(
+ *self.get_insert_table_query(
+ table,
+ (
+ result.datetime,
+ result.url,
+ result.status,
+ result.length,
+ result.type,
+ result.redirect,
+ ),
+ )
+ )
+ conn.commit()
+
+ if not self._reuse:
+ conn.close()
+
+ def finish(self):
+ if self._conn:
+ self._conn.close()
diff --git a/lib/report/html_report.py b/lib/report/html_report.py
new file mode 100755
index 000000000..bacd023f8
--- /dev/null
+++ b/lib/report/html_report.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+#
+# Author: Mauro Soria
+
+import json
+import os
+
+from jinja2 import Environment, FileSystemLoader
+
+from lib.core.decorators import locked
+from lib.core.settings import COMMAND, START_TIME
+from lib.report.factory import BaseReport, FileReportMixin
+
+
+class HTMLReport(FileReportMixin, BaseReport):
+ __format__ = "html"
+ __extension__ = "html"
+
+ def new(self):
+ return self.generate([])
+
+ def parse(self, file):
+ with open(file) as fh:
+ while 1:
+ line = fh.readline()
+ # Gotta be the worst way to parse it but I don't know a better way:P
+ if line.startswith(" resources: "):
+ return json.loads(line[19:-2])
+
+ @locked
+ def save(self, file, result):
+ results = self.parse(file)
+ results.append({
+ "url": result.url,
+ "status": result.status,
+ "contentLength": result.length,
+ "contentType": result.type,
+ "redirect": result.redirect,
+ })
+ self.write(file, self.generate(results))
+
+ def generate(self, results):
+ file_loader = FileSystemLoader(
+ os.path.dirname(os.path.realpath(__file__)) + "/templates/"
+ )
+ env = Environment(loader=file_loader)
+ template = env.get_template("html_report_template.html")
+ return template.render(
+ metadata={"command": COMMAND, "date": START_TIME},
+ results=results,
+ )
diff --git a/lib/report/json_report.py b/lib/report/json_report.py
new file mode 100755
index 000000000..adf61aa68
--- /dev/null
+++ b/lib/report/json_report.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+#
+# Author: Mauro Soria
+
+import json
+
+from lib.core.decorators import locked
+from lib.core.settings import COMMAND, START_TIME
+from lib.report.factory import BaseReport, FileReportMixin
+
+
+class JSONReport(FileReportMixin, BaseReport):
+ __format__ = "json"
+ __extension__ = "json"
+
+ def new(self):
+ return {
+ "info": {"args": COMMAND, "time": START_TIME},
+ "results": [],
+ }
+
+ def parse(self, file):
+ with open(file) as fh:
+ return json.load(fh)
+
+ @locked
+ def save(self, file, result):
+ data = self.parse(file)
+ data["results"].append({
+ "url": result.url,
+ "status": result.status,
+ "contentLength": result.length,
+ "contentType": result.type,
+ "redirect": result.redirect,
+ })
+ self.write(file, data)
+
+ def write(self, file, data):
+ with open(file, "w") as fh:
+ json.dump(data, fh, sort_keys=True, indent=4)
diff --git a/lib/report/manager.py b/lib/report/manager.py
new file mode 100755
index 000000000..536413a6b
--- /dev/null
+++ b/lib/report/manager.py
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+#
+# Author: Mauro Soria
+
+from urllib.parse import urlparse
+
+from lib.core.data import options
+from lib.core.settings import STANDARD_PORTS, START_DATETIME
+from lib.report.csv_report import CSVReport
+from lib.report.html_report import HTMLReport
+from lib.report.json_report import JSONReport
+from lib.report.markdown_report import MarkdownReport
+from lib.report.mysql_report import MySQLReport
+from lib.report.plain_text_report import PlainTextReport
+from lib.report.postgresql_report import PostgreSQLReport
+from lib.report.simple_report import SimpleReport
+from lib.report.sqlite_report import SQLiteReport
+from lib.report.xml_report import XMLReport
+
+
+output_handlers = {
+ "simple": (SimpleReport, [options["output_file"]]),
+ "plain": (PlainTextReport, [options["output_file"]]),
+ "json": (JSONReport, [options["output_file"]]),
+ "xml": (XMLReport, [options["output_file"]]),
+ "md": (MarkdownReport, [options["output_file"]]),
+ "csv": (CSVReport, [options["output_file"]]),
+ "html": (HTMLReport, [options["output_file"]]),
+ "sqlite": (SQLiteReport, [options["output_file"], options["output_table"]]),
+ "mysql": (MySQLReport, [options["mysql_url"], options["output_table"]]),
+ "postgresql": (PostgreSQLReport, [options["postgres_url"], options["output_table"]]),
+}
+
+
+class ReportManager:
+ def __init__(self, formats):
+ self.reports = []
+
+ for format in formats:
+ # No output location provided
+ if any(not _ for _ in output_handlers[format][1]):
+ continue
+ self.reports.append((output_handlers[format][0](), output_handlers[format][1]))
+
+ def prepare(self, target):
+ for reporter, sources in self.reports:
+ reporter.initiate(
+ *map(
+ lambda s: self.format(s, target, reporter),
+ sources,
+ )
+ )
+
+ def save(self, result):
+ for reporter, sources in self.reports:
+ reporter.save(
+ *map(
+ lambda s: self.format(s, result.url, reporter),
+ sources,
+ ),
+ result,
+ )
+
+ def finish(self):
+ for reporter, sources in self.reports:
+ reporter.finish()
+
+ def format(self, string, target, handler):
+ parsed = urlparse(target)
+
+ return string.format(
+ # Get date from datetime string
+ date=START_DATETIME.split()[0],
+ host=parsed.hostname,
+ scheme=parsed.scheme,
+ port=parsed.port or STANDARD_PORTS[parsed.scheme],
+ format=handler.__format__,
+ extension=handler.__extension__,
+ )
diff --git a/lib/reports/markdown_report.py b/lib/report/markdown_report.py
similarity index 63%
rename from lib/reports/markdown_report.py
rename to lib/report/markdown_report.py
index 7075b886a..e2d5e715a 100755
--- a/lib/reports/markdown_report.py
+++ b/lib/report/markdown_report.py
@@ -16,28 +16,31 @@
#
# Author: Mauro Soria
-import time
-import sys
+from lib.core.decorators import locked
+from lib.core.settings import (
+ COMMAND,
+ NEW_LINE,
+ START_TIME,
+)
+from lib.report.factory import BaseReport, FileReportMixin
-from lib.core.settings import NEW_LINE
-from lib.reports.base import FileBaseReport
+class MarkdownReport(FileReportMixin, BaseReport):
+ __format__ = "markdown"
+ __extension__ = "md"
-class MarkdownReport(FileBaseReport):
- def get_header(self):
+ def new(self):
header = "### Information" + NEW_LINE
- header += f"Command: {chr(32).join(sys.argv)}"
+ header += f"Command: {COMMAND}"
header += NEW_LINE
- header += f"Time: {time.ctime()}"
+ header += f"Time: {START_TIME}"
header += NEW_LINE * 2
header += "URL | Status | Size | Content Type | Redirection" + NEW_LINE
header += "----|--------|------|--------------|------------" + NEW_LINE
return header
- def generate(self, entries):
- output = self.get_header()
-
- for entry in entries:
- output += f"{entry.url} | {entry.status} | {entry.length} | {entry.type} | {entry.redirect}" + NEW_LINE
-
- return output
+ @locked
+ def save(self, file, result):
+ md = self.parse(file)
+ md += f"{result.url} | {result.status} | {result.length} | {result.type} | {result.redirect}" + NEW_LINE
+ self.write(file, md)
diff --git a/lib/reports/mysql_report.py b/lib/report/mysql_report.py
similarity index 76%
rename from lib/reports/mysql_report.py
rename to lib/report/mysql_report.py
index 246b30e85..3f52f9221 100755
--- a/lib/reports/mysql_report.py
+++ b/lib/report/mysql_report.py
@@ -22,22 +22,29 @@
from urllib.parse import urlparse
from lib.core.exceptions import InvalidURLException
-from lib.reports.base import SQLBaseReport
+from lib.report.factory import BaseReport, SQLReportMixin
-class MySQLReport(SQLBaseReport):
- def connect(self, url):
- parsed = urlparse(url)
+class MySQLReport(SQLReportMixin, BaseReport):
+ __format__ = "sql"
+ __extension__ = None
+ _reuse = True
+
+ def is_valid(self, url):
+ return url.startswith("mysql://")
- if not parsed.scheme == "mysql":
+ def connect(self, url):
+ if not self.is_valid(url):
raise InvalidURLException("Provided MySQL URL does not start with mysql://")
- self.conn = mysql.connector.connect(
+ parsed = urlparse(url)
+ conn = mysql.connector.connect(
host=parsed.hostname,
port=parsed.port or 3306,
user=parsed.username,
password=parsed.password,
database=parsed.path.lstrip("/"),
)
- self.conn.sql_mode = [SQLMode.ANSI_QUOTES]
- self.cursor = self.conn.cursor()
+ conn.sql_mode = [SQLMode.ANSI_QUOTES]
+
+ return conn
diff --git a/lib/reports/plain_text_report.py b/lib/report/plain_text_report.py
similarity index 50%
rename from lib/reports/plain_text_report.py
rename to lib/report/plain_text_report.py
index c078e1808..89105e181 100755
--- a/lib/reports/plain_text_report.py
+++ b/lib/report/plain_text_report.py
@@ -16,28 +16,32 @@
#
# Author: Mauro Soria
-import time
-import sys
+from lib.core.decorators import locked
+from lib.core.settings import (
+ COMMAND,
+ NEW_LINE,
+ START_TIME,
+)
+from lib.report.factory import BaseReport, FileReportMixin
+from lib.utils.common import get_readable_size
-from lib.core.settings import NEW_LINE
-from lib.reports.base import FileBaseReport
-from lib.utils.common import human_size
+class PlainTextReport(FileReportMixin, BaseReport):
+ __format__ = "plain"
+ __extension__ = "txt"
-class PlainTextReport(FileBaseReport):
- def get_header(self):
- return f"# Dirsearch started {time.ctime()} as: {chr(32).join(sys.argv)}" + NEW_LINE * 2
+ def new(self):
+ return f"# Dirsearch started {START_TIME} as: {COMMAND}" + NEW_LINE * 2
- def generate(self, entries):
- output = self.get_header()
+ @locked
+ def save(self, file, result):
+ readable_size = get_readable_size(result.length)
+ data = self.parse(file)
+ data += f"{result.status} {readable_size.rjust(6, chr(32))} {result.url}"
- for entry in entries:
- readable_size = human_size(entry.length)
- output += f"{entry.status} {readable_size.rjust(6, chr(32))} {entry.url}"
+ if result.redirect:
+ data += f" -> {result.redirect}"
- if entry.redirect:
- output += f" -> REDIRECTS TO: {entry.redirect}"
+ data += NEW_LINE
- output += NEW_LINE
-
- return output
+ self.write(file, data)
diff --git a/lib/reports/postgresql_report.py b/lib/report/postgresql_report.py
similarity index 73%
rename from lib/reports/postgresql_report.py
rename to lib/report/postgresql_report.py
index 68c67962c..bd9fd667b 100755
--- a/lib/reports/postgresql_report.py
+++ b/lib/report/postgresql_report.py
@@ -19,13 +19,19 @@
import psycopg
from lib.core.exceptions import InvalidURLException
-from lib.reports.base import SQLBaseReport
+from lib.report.factory import BaseReport, SQLReportMixin
-class PostgreSQLReport(SQLBaseReport):
+class PostgreSQLReport(SQLReportMixin, BaseReport):
+ __format__ = "sql"
+ __extension__ = None
+ _reuse = True
+
+ def is_valid(self, url):
+ return url.startswith(("postgres://", "postgresql://"))
+
def connect(self, url):
- if not url.startswith("postgresql://"):
+ if not self.is_valid(url):
raise InvalidURLException("Provided PostgreSQL URL does not start with postgresql://")
- self.conn = psycopg.connect(url)
- self.cursor = self.conn.cursor()
+ return psycopg.connect(url)
diff --git a/lib/reports/simple_report.py b/lib/report/simple_report.py
similarity index 67%
rename from lib/reports/simple_report.py
rename to lib/report/simple_report.py
index d0bb00717..7401f7622 100755
--- a/lib/reports/simple_report.py
+++ b/lib/report/simple_report.py
@@ -16,10 +16,20 @@
#
# Author: Mauro Soria
+from lib.core.decorators import locked
from lib.core.settings import NEW_LINE
-from lib.reports.base import FileBaseReport
+from lib.report.factory import BaseReport, FileReportMixin
-class SimpleReport(FileBaseReport):
- def generate(self, entries):
- return NEW_LINE.join(entry.url for entry in entries)
+class SimpleReport(FileReportMixin, BaseReport):
+ __format__ = "simple"
+ __extension__ = "txt"
+
+ def new(self):
+ return ""
+
+ @locked
+ def save(self, file, result):
+ data = self.parse(file)
+ data += result.url + NEW_LINE
+ self.write(file, data)
diff --git a/lib/reports/sqlite_report.py b/lib/report/sqlite_report.py
similarity index 54%
rename from lib/reports/sqlite_report.py
rename to lib/report/sqlite_report.py
index 7c9d2c27b..59c311cb4 100755
--- a/lib/reports/sqlite_report.py
+++ b/lib/report/sqlite_report.py
@@ -18,17 +18,18 @@
import sqlite3
-from lib.reports.base import SQLBaseReport
+from lib.report.factory import BaseReport, SQLReportMixin
+from lib.utils.file import FileUtils
-class SQLiteReport(SQLBaseReport):
- def connect(self, output_file):
- self.conn = sqlite3.connect(output_file, check_same_thread=False)
- self.cursor = self.conn.cursor()
+class SQLiteReport(SQLReportMixin, BaseReport):
+ __format__ = "sql"
+ __extension__ = "sqlite"
+ _reuse = False
- def create_table_query(self, table):
+ def get_create_table_query(self, table):
return (f'''CREATE TABLE "{table}" (
- time DATETIME DEFAULT CURRENT_TIMESTAMP,
+ time DATETIME,
url TEXT,
status_code INTEGER,
content_length INTEGER,
@@ -36,7 +37,16 @@ def create_table_query(self, table):
redirect TEXT
);''',)
- def insert_table_query(self, table, values):
- return (f'''INSERT INTO "{table}" (url, status_code, content_length, content_type, redirect)
- VALUES
- (?, ?, ?, ?, ?)''', values)
+ def get_insert_table_query(self, table, values):
+ return (f'INSERT INTO "{table}" VALUES (?, ?, ?, ?, ?, ?);', values)
+
+ def connect(self, file):
+ FileUtils.create_dir(FileUtils.parent(file))
+ conn = sqlite3.connect(file, check_same_thread=False)
+ # Check if the file is a proper sqlite database
+ try:
+ conn.cursor().execute("PRAGMA integrity_check")
+ except sqlite3.DatabaseError:
+ raise Exception(f"{file} is not empty or is not a SQLite database")
+ else:
+ return conn
diff --git a/lib/reports/templates/html_report_template.html b/lib/report/templates/html_report_template.html
similarity index 84%
rename from lib/reports/templates/html_report_template.html
rename to lib/report/templates/html_report_template.html
index 94760f1ff..3241658b2 100644
--- a/lib/reports/templates/html_report_template.html
+++ b/lib/report/templates/html_report_template.html
@@ -39,6 +39,7 @@