From 629abc4c0ab4fb51d6652364cd0a1fbe2121029f Mon Sep 17 00:00:00 2001 From: Noam Rathaus Date: Tue, 30 Apr 2024 16:09:29 +0300 Subject: [PATCH 01/10] Add code to support %DOMAIN% tag replacer --- lib/core/dictionary.py | 16 ++++++++++++++++ lib/core/settings.py | 2 ++ 2 files changed, 18 insertions(+) diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index 5d6f47098..9883de966 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -18,10 +18,13 @@ import re +import urllib.parse + from lib.core.data import options from lib.core.decorators import locked from lib.core.settings import ( SCRIPT_PATH, + DOMAIN_TAG, EXTENSION_TAG, EXCLUDE_OVERWRITE_EXTENSIONS, EXTENSION_RECOGNITION_REGEX, @@ -122,6 +125,19 @@ def generate(self, files=[], is_blacklist=False): if not self.is_valid(line): continue + # If %DOMAIN% is found, replace it with self.urls (insert as many as they exist) + if DOMAIN_TAG in line: + for url in options['urls']: + parsed = urllib.parse.urlparse(url) + if parsed is None: + continue + + line = line.replace(DOMAIN_TAG, parsed.hostname) + wordlist.add(newline) + + # At the moment we allow only one TAG per line + continue + # Classic dirsearch wordlist processing (with %EXT% keyword) if EXTENSION_TAG in line.lower(): for extension in options["extensions"]: diff --git a/lib/core/settings.py b/lib/core/settings.py index 05b43dafe..d7d767e4e 100755 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -86,6 +86,8 @@ EXTENSION_TAG = "%ext%" +DOMAIN_TAG = "%DOMAIN%" + EXTENSION_RECOGNITION_REGEX = r"\w+([.][a-zA-Z0-9]{2,5}){1,3}~?$" QUERY_STRING_REGEX = r"^(\&?([^=& ]+)\=([^=& ]+)?){1,200}$" From 0eb955f4a135a4d9b7205888ed78424d1debed1f Mon Sep 17 00:00:00 2001 From: Noam Rathaus Date: Tue, 30 Apr 2024 16:12:08 +0300 Subject: [PATCH 02/10] Better code design is to prepare the list of uniq hostnames --- lib/core/dictionary.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index 9883de966..5d3556dfc 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -114,6 +114,15 @@ def generate(self, files=[], is_blacklist=False): wordlist = OrderedSet() re_ext_tag = re.compile(EXTENSION_TAG, re.IGNORECASE) + # Prepare the list of hostnames from the URLs + hostnames = set() + for url in options['urls']: + parsed = urllib.parse.urlparse(url) + if parsed is None: + continue + + hostnames.add(parsed.hostname) + for dict_file in files: for line in FileUtils.get_lines(dict_file): # Removing leading "/" to work with prefixes later @@ -127,12 +136,8 @@ def generate(self, files=[], is_blacklist=False): # If %DOMAIN% is found, replace it with self.urls (insert as many as they exist) if DOMAIN_TAG in line: - for url in options['urls']: - parsed = urllib.parse.urlparse(url) - if parsed is None: - continue - - line = line.replace(DOMAIN_TAG, parsed.hostname) + for hostname in hostnames: + line = line.replace(DOMAIN_TAG, hostname) wordlist.add(newline) # At the moment we allow only one TAG per line From fdfffa844a114549d527670e027a613703564ab5 Mon Sep 17 00:00:00 2001 From: Noam Rathaus Date: Sun, 12 May 2024 09:25:40 +0300 Subject: [PATCH 03/10] Add breaking of domain names to its components --- lib/core/dictionary.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index 5d3556dfc..e2329db26 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -137,8 +137,23 @@ def generate(self, files=[], is_blacklist=False): # If %DOMAIN% is found, replace it with self.urls (insert as many as they exist) if DOMAIN_TAG in line: for hostname in hostnames: - line = line.replace(DOMAIN_TAG, hostname) + split_hostnames = hostname.split(".") + newline = line.replace(DOMAIN_TAG, hostname) wordlist.add(newline) + if len(split_hostnames) > 1: + # We go from 1 dot to .. n .. as we want to return from www.somesite.co.uk: + # www.somesite.co.uk, somesite.co.uk, co.uk + for dots in range(1, len(split_hostnames)): + new_hostname = ".".join(split_hostnames[dots:]) + newline = line.replace(DOMAIN_TAG, new_hostname) + wordlist.add(newline) + + # We go from n dot to .. 1 .. as we want to return from www.somesite.co.uk: + # www.somesite.co, www.somesite, www + for dots in range(1, len(split_hostnames)): + new_hostname = ".".join(split_hostnames[:dots]) + newline = line.replace(DOMAIN_TAG, new_hostname) + wordlist.add(newline) # At the moment we allow only one TAG per line continue From 7b63d8d0ac5f3149714d6247d0b3070224e49d23 Mon Sep 17 00:00:00 2001 From: Noam Rathaus Date: Tue, 14 May 2024 18:03:06 +0300 Subject: [PATCH 04/10] Support multi-tag --- lib/core/dictionary.py | 101 +++++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 40 deletions(-) diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index e2329db26..f67690a0d 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -134,65 +134,86 @@ def generate(self, files=[], is_blacklist=False): if not self.is_valid(line): continue - # If %DOMAIN% is found, replace it with self.urls (insert as many as they exist) - if DOMAIN_TAG in line: - for hostname in hostnames: - split_hostnames = hostname.split(".") - newline = line.replace(DOMAIN_TAG, hostname) - wordlist.add(newline) - if len(split_hostnames) > 1: - # We go from 1 dot to .. n .. as we want to return from www.somesite.co.uk: - # www.somesite.co.uk, somesite.co.uk, co.uk - for dots in range(1, len(split_hostnames)): - new_hostname = ".".join(split_hostnames[dots:]) - newline = line.replace(DOMAIN_TAG, new_hostname) - wordlist.add(newline) - - # We go from n dot to .. 1 .. as we want to return from www.somesite.co.uk: - # www.somesite.co, www.somesite, www - for dots in range(1, len(split_hostnames)): - new_hostname = ".".join(split_hostnames[:dots]) - newline = line.replace(DOMAIN_TAG, new_hostname) - wordlist.add(newline) - - # At the moment we allow only one TAG per line - continue + new_lines = [line] + extension_tag_triggered = False + # We need this to know that EXTENSION_TAG was used + # and not to trigger the complementary function that + # handles when it is not + if EXTENSION_TAG in line.lower(): + extension_tag_triggered = True + + final_lines = [] + for new_line in new_lines: + # If %DOMAIN% is found, replace it with self.urls (insert as many as they exist) + original_line = new_word['line'] + if DOMAIN_TAG in original_line: + for hostname in hostnames: + split_hostnames = hostname.split(".") + new_line = original_line.replace(DOMAIN_TAG, hostname) + final_lines.append(new_line) + + if len(split_hostnames) > 1: + # We go from 1 dot to .. n .. as we want to return from www.somesite.co.uk: + # www.somesite.co.uk, somesite.co.uk, co.uk + for dots in range(1, len(split_hostnames)): + new_hostname = ".".join(split_hostnames[dots:]) + new_line = original_line.replace(DOMAIN_TAG, new_hostname) + final_lines.append(new_line) + + # We go from n dot to .. 1 .. as we want to return from www.somesite.co.uk: + # www.somesite.co, www.somesite, www + for dots in range(1, len(split_hostnames)): + new_hostname = ".".join(split_hostnames[:dots]) + new_line = original_line.replace(DOMAIN_TAG, new_hostname) + final_lines.append(new_line) + + new_lines = final_lines[:] + final_lines = [] # Classic dirsearch wordlist processing (with %EXT% keyword) - if EXTENSION_TAG in line.lower(): - for extension in options["extensions"]: - newline = re_ext_tag.sub(extension, line) - wordlist.add(newline) - else: - wordlist.add(line) + for new_word in new_lines: + if EXTENSION_TAG in new_word.lower(): + for extension in options["extensions"]: + new_line = re_ext_tag.sub(extension, line) + final_lines.append(new_line) + + # Go over the new_words generated + for final_line in final_lines: + wordlist.add(final_line) + + # This keeps original code, that would do an else (i.e if EXTENSION_TAG) + # is not triggered + if extension_tag_triggered: + continue # "Forcing extensions" and "overwriting extensions" shouldn't apply to # blacklists otherwise it might cause false negatives if is_blacklist: continue - # If "forced extensions" is used and the path is not a directory (terminated by /) - # or has had an extension already, append extensions to the path + # If "forced extensions" is used and the path is not a directory + # (terminated by /) or has had an extension already, append + # extensions to the path if ( options["force_extensions"] - and "." not in line - and not line.endswith("/") + and "." not in final_line + and not final_line.endswith("/") ): - wordlist.add(line + "/") + wordlist.add(final_line + "/") for extension in options["extensions"]: - wordlist.add(f"{line}.{extension}") + wordlist.add(f"{final_line}.{extension}") # Overwrite unknown extensions with selected ones (but also keep the origin) elif ( options["overwrite_extensions"] - and not line.endswith(options["extensions"] + EXCLUDE_OVERWRITE_EXTENSIONS) + and not final_line.endswith(options["extensions"] + EXCLUDE_OVERWRITE_EXTENSIONS) # Paths that have queries in wordlist are usually used for exploiting # disclosed vulnerabilities of services, skip such paths - and "?" not in line - and "#" not in line - and re.search(EXTENSION_RECOGNITION_REGEX, line) + and "?" not in final_line + and "#" not in final_line + and re.search(EXTENSION_RECOGNITION_REGEX, final_line) ): - base = line.split(".")[0] + base = final_line.split(".")[0] for extension in options["extensions"]: wordlist.add(f"{base}.{extension}") From abdfed9086ab2a3dc3812f6b7fff6786acc668cc Mon Sep 17 00:00:00 2001 From: Noam Rathaus Date: Tue, 14 May 2024 18:05:42 +0300 Subject: [PATCH 05/10] Forgot to change all occurrences of new_word --- lib/core/dictionary.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index f67690a0d..be85ea8c6 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -145,7 +145,7 @@ def generate(self, files=[], is_blacklist=False): final_lines = [] for new_line in new_lines: # If %DOMAIN% is found, replace it with self.urls (insert as many as they exist) - original_line = new_word['line'] + original_line = new_line['line'] if DOMAIN_TAG in original_line: for hostname in hostnames: split_hostnames = hostname.split(".") @@ -171,13 +171,13 @@ def generate(self, files=[], is_blacklist=False): final_lines = [] # Classic dirsearch wordlist processing (with %EXT% keyword) - for new_word in new_lines: - if EXTENSION_TAG in new_word.lower(): + for new_line in new_lines: + if EXTENSION_TAG in new_line.lower(): for extension in options["extensions"]: new_line = re_ext_tag.sub(extension, line) final_lines.append(new_line) - # Go over the new_words generated + # Go over the new_lines generated for final_line in final_lines: wordlist.add(final_line) From 48c910c93c23fc36f9c893307b1afe87630eff46 Mon Sep 17 00:00:00 2001 From: Noam Rathaus Date: Tue, 14 May 2024 18:06:31 +0300 Subject: [PATCH 06/10] 'new_line' ... not the structure --- lib/core/dictionary.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index be85ea8c6..f6cd62061 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -145,11 +145,10 @@ def generate(self, files=[], is_blacklist=False): final_lines = [] for new_line in new_lines: # If %DOMAIN% is found, replace it with self.urls (insert as many as they exist) - original_line = new_line['line'] - if DOMAIN_TAG in original_line: + if DOMAIN_TAG in new_line: for hostname in hostnames: split_hostnames = hostname.split(".") - new_line = original_line.replace(DOMAIN_TAG, hostname) + new_line = new_line.replace(DOMAIN_TAG, hostname) final_lines.append(new_line) if len(split_hostnames) > 1: @@ -157,14 +156,14 @@ def generate(self, files=[], is_blacklist=False): # www.somesite.co.uk, somesite.co.uk, co.uk for dots in range(1, len(split_hostnames)): new_hostname = ".".join(split_hostnames[dots:]) - new_line = original_line.replace(DOMAIN_TAG, new_hostname) + new_line = new_line.replace(DOMAIN_TAG, new_hostname) final_lines.append(new_line) # We go from n dot to .. 1 .. as we want to return from www.somesite.co.uk: # www.somesite.co, www.somesite, www for dots in range(1, len(split_hostnames)): new_hostname = ".".join(split_hostnames[:dots]) - new_line = original_line.replace(DOMAIN_TAG, new_hostname) + new_line = new_line.replace(DOMAIN_TAG, new_hostname) final_lines.append(new_line) new_lines = final_lines[:] From 701d8b8a5c603ffa47a2e96deaca72b64395deb9 Mon Sep 17 00:00:00 2001 From: Noam Rathaus Date: Tue, 14 May 2024 18:10:20 +0300 Subject: [PATCH 07/10] Fix coding mistakes found after running it --- lib/core/dictionary.py | 77 ++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index f6cd62061..363244751 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -136,45 +136,58 @@ def generate(self, files=[], is_blacklist=False): new_lines = [line] extension_tag_triggered = False + domain_tag_triggered = False + # We need this to know that EXTENSION_TAG was used # and not to trigger the complementary function that # handles when it is not if EXTENSION_TAG in line.lower(): extension_tag_triggered = True - final_lines = [] - for new_line in new_lines: - # If %DOMAIN% is found, replace it with self.urls (insert as many as they exist) - if DOMAIN_TAG in new_line: - for hostname in hostnames: - split_hostnames = hostname.split(".") - new_line = new_line.replace(DOMAIN_TAG, hostname) - final_lines.append(new_line) - - if len(split_hostnames) > 1: - # We go from 1 dot to .. n .. as we want to return from www.somesite.co.uk: - # www.somesite.co.uk, somesite.co.uk, co.uk - for dots in range(1, len(split_hostnames)): - new_hostname = ".".join(split_hostnames[dots:]) - new_line = new_line.replace(DOMAIN_TAG, new_hostname) - final_lines.append(new_line) - - # We go from n dot to .. 1 .. as we want to return from www.somesite.co.uk: - # www.somesite.co, www.somesite, www - for dots in range(1, len(split_hostnames)): - new_hostname = ".".join(split_hostnames[:dots]) - new_line = new_line.replace(DOMAIN_TAG, new_hostname) - final_lines.append(new_line) - - new_lines = final_lines[:] - final_lines = [] + if DOMAIN_TAG in line.lower(): + domain_tag_triggered = True - # Classic dirsearch wordlist processing (with %EXT% keyword) - for new_line in new_lines: - if EXTENSION_TAG in new_line.lower(): - for extension in options["extensions"]: - new_line = re_ext_tag.sub(extension, line) - final_lines.append(new_line) + final_lines = [] + if domain_tag_triggered: + for new_line in new_lines: + # If %DOMAIN% is found, replace it with self.urls (insert as many as they exist) + if DOMAIN_TAG in new_line: + for hostname in hostnames: + split_hostnames = hostname.split(".") + new_line = new_line.replace(DOMAIN_TAG, hostname) + final_lines.append(new_line) + + if len(split_hostnames) > 1: + # We go from 1 dot to .. n .. as we want to return from www.somesite.co.uk: + # www.somesite.co.uk, somesite.co.uk, co.uk + for dots in range(1, len(split_hostnames)): + new_hostname = ".".join(split_hostnames[dots:]) + new_line = new_line.replace(DOMAIN_TAG, new_hostname) + final_lines.append(new_line) + + # We go from n dot to .. 1 .. as we want to return from www.somesite.co.uk: + # www.somesite.co, www.somesite, www + for dots in range(1, len(split_hostnames)): + new_hostname = ".".join(split_hostnames[:dots]) + new_line = new_line.replace(DOMAIN_TAG, new_hostname) + final_lines.append(new_line) + + new_lines = final_lines[:] + + if extension_tag_triggered: + # Remove the items from the list, as we re-generate them here + + final_lines = [] + # Classic dirsearch wordlist processing (with %EXT% keyword) + for new_line in new_lines: + if EXTENSION_TAG in new_line.lower(): + for extension in options["extensions"]: + new_line = re_ext_tag.sub(extension, line) + final_lines.append(new_line) + + # If neither was triggered, just copy new_lines to our final outcome + if not domain_tag_triggered and not extension_tag_triggered: + final_lines = new_lines # Go over the new_lines generated for final_line in final_lines: From 06e58eda4de5addec996089bac241f6503f9b37e Mon Sep 17 00:00:00 2001 From: Noam Rathaus Date: Tue, 14 May 2024 18:12:07 +0300 Subject: [PATCH 08/10] DOMAIN isn't 'lower' --- lib/core/dictionary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index 363244751..a3108ae3c 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -144,7 +144,7 @@ def generate(self, files=[], is_blacklist=False): if EXTENSION_TAG in line.lower(): extension_tag_triggered = True - if DOMAIN_TAG in line.lower(): + if DOMAIN_TAG in line: domain_tag_triggered = True final_lines = [] From a8be50d59a16e7913e6f3784ea6161a89b57fe3a Mon Sep 17 00:00:00 2001 From: Noam Rathaus Date: Tue, 14 May 2024 18:13:51 +0300 Subject: [PATCH 09/10] Modify the copy --- lib/core/dictionary.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index a3108ae3c..81841b0c9 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -154,23 +154,23 @@ def generate(self, files=[], is_blacklist=False): if DOMAIN_TAG in new_line: for hostname in hostnames: split_hostnames = hostname.split(".") - new_line = new_line.replace(DOMAIN_TAG, hostname) - final_lines.append(new_line) + final_line = new_line.replace(DOMAIN_TAG, hostname) + final_lines.append(final_line) if len(split_hostnames) > 1: # We go from 1 dot to .. n .. as we want to return from www.somesite.co.uk: # www.somesite.co.uk, somesite.co.uk, co.uk for dots in range(1, len(split_hostnames)): new_hostname = ".".join(split_hostnames[dots:]) - new_line = new_line.replace(DOMAIN_TAG, new_hostname) - final_lines.append(new_line) + final_line = new_line.replace(DOMAIN_TAG, new_hostname) + final_lines.append(final_line) # We go from n dot to .. 1 .. as we want to return from www.somesite.co.uk: # www.somesite.co, www.somesite, www for dots in range(1, len(split_hostnames)): new_hostname = ".".join(split_hostnames[:dots]) - new_line = new_line.replace(DOMAIN_TAG, new_hostname) - final_lines.append(new_line) + final_line = new_line.replace(DOMAIN_TAG, new_hostname) + final_lines.append(final_line) new_lines = final_lines[:] @@ -182,8 +182,8 @@ def generate(self, files=[], is_blacklist=False): for new_line in new_lines: if EXTENSION_TAG in new_line.lower(): for extension in options["extensions"]: - new_line = re_ext_tag.sub(extension, line) - final_lines.append(new_line) + final_line = re_ext_tag.sub(extension, line) + final_lines.append(final_line) # If neither was triggered, just copy new_lines to our final outcome if not domain_tag_triggered and not extension_tag_triggered: From 08128f779a50ee5df0476cded2bd6751f959797e Mon Sep 17 00:00:00 2001 From: Noam Rathaus Date: Tue, 14 May 2024 18:14:47 +0300 Subject: [PATCH 10/10] Modified the wrong variable --- lib/core/dictionary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index 81841b0c9..1d78998df 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -182,7 +182,7 @@ def generate(self, files=[], is_blacklist=False): for new_line in new_lines: if EXTENSION_TAG in new_line.lower(): for extension in options["extensions"]: - final_line = re_ext_tag.sub(extension, line) + final_line = re_ext_tag.sub(extension, new_line) final_lines.append(final_line) # If neither was triggered, just copy new_lines to our final outcome