diff --git a/Belati.py b/Belati.py index 1d649df..35f4f77 100644 --- a/Belati.py +++ b/Belati.py @@ -31,17 +31,22 @@ import time import dns.resolver import tldextract -from plugins.check_domain import CheckDomain + +from plugins.about_project import AboutProject from plugins.banner_grab import BannerGrab -from plugins.logger import Logger +from plugins.check_domain import CheckDomain +from plugins.common_service_check import CommonServiceCheck +from plugins.gather_company import GatherCompany +from plugins.git_finder import GitFinder from plugins.harvest_email import HarvestEmail from plugins.harvest_public_document import HarvestPublicDocument -from plugins.scan_nmap import ScanNmap -from plugins.wappalyzer import Wappalyzer -from plugins.git_finder import GitFinder +from plugins.logger import Logger from plugins.robots_scraper import RobotsScraper -from plugins.about_project import AboutProject +from plugins.scan_nmap import ScanNmap +from plugins.svn_finder import SVNFinder from plugins.url_request import URLRequest +from plugins.wappalyzer import Wappalyzer + from lib.Sublist3r import sublist3r from lib.CheckMyUsername.check_my_username import CheckMyUsername from dnsknife.scanner import Scanner @@ -66,7 +71,7 @@ def __init__(self): parser.add_argument('-d', action='store', dest='domain' , help='Perform OSINT from Domain e.g petruknisme.com(without protocol http/https)') parser.add_argument('-u', action='store', dest='username' , help='Perform OSINT from username e.g petruknisme') parser.add_argument('-e', action='store', dest='email' , help='Perform OSINT from email address') - parser.add_argument('-c', action='store', dest='orgcomp' , help='Perform OSINT from Organization or Company Name') + parser.add_argument('-c', action='store', dest='orgcomp' , help='Perform OSINT from Organization or Company Name, use double quote') parser.add_argument('-o', action='store', dest='output_files' , help='Save log for output files') parser.add_argument('--db-file', action='store', dest='db_file_location' , help='Specify Database File Location(SQLite3)') parser.add_argument('--single-proxy', action='store', dest='single_proxy', help='Proxy support with single IP (ex: http://127.0.0.1:8080)' ) @@ -102,10 +107,10 @@ def __init__(self): proxy = self.multiple_proxy_list extract_domain = tldextract.extract(domain) + self.check_domain(self.url_req.ssl_checker(domain), proxy) self.banner_grab(self.url_req.ssl_checker(domain), proxy) - if extract_domain.subdomain == "": self.robots_scraper(self.url_req.ssl_checker(domain), proxy) self.enumerate_subdomains(domain, proxy) @@ -125,7 +130,10 @@ def __init__(self): if username is not None: self.username_checker(username) - if email or orgcomp is not None: + if orgcomp is not None: + self.gather_company(orgcomp, proxy) + + if email is not None: log.console_log("This feature will be coming soon. Be patient :)") log.console_log("{}All done sir! All log saved in log directory and dowloaded file saved in belatiFiles {}".format(Y, W)) @@ -142,6 +150,7 @@ def show_banner(self): | $$$$$$$/| $$$$$$$$| $$$$$$$$| $$ | $$ | $$ /$$$$$$ |_______/ |________/|________/|__/ |__/ |__/ |______/ + The Traditional Swiss Army Knife for OSINT =[ {} {} by {}]= @@ -188,6 +197,7 @@ def enumerate_subdomains(self, domain_name, proxy): self.robots_scraper(self.url_req.ssl_checker(subdomain), proxy) self.wappalyzing_webpage(subdomain) self.public_git_finder(subdomain, proxy) + self.public_svn_finder(subdomain, proxy) try: subdomain_ip_list.append(socket.gethostbyname(subdomain)) except socket.gaierror: @@ -195,6 +205,10 @@ def enumerate_subdomains(self, domain_name, proxy): subdomain_ip_listFix = list(set(subdomain_ip_list)) + # check common service port TODO + #for ipaddress in subdomain_ip_listFix: + #self.common_service_check(ipaddress) + for ipaddress in subdomain_ip_listFix: self.service_scanning(ipaddress) @@ -300,15 +314,38 @@ def public_git_finder(self, domain, proxy_address): log.console_log("{}[*] Checking Public GIT Directory on domain {}{}".format(G, domain, W)) git_finder = GitFinder() if git_finder.check_git(domain, proxy_address) == True: - log.console_log("{}[+] Gotcha! You are in luck boy!{}".format(G, W)) + log.console_log("{}[+] Gotcha! You are in luck, boy![{}/.git/]{}".format(Y, domain, W)) + + def public_svn_finder(self, domain, proxy_address): + log.console_log("{}[*] Checking Public SVN Directory on domain {}{}".format(G, domain, W)) + svn_finder = SVNFinder() + if svn_finder.check_svn(domain, proxy_address) == 403: + log.console_log("{}[+] Um... Forbidden :( {}".format(Y, W)) + if svn_finder.check_svn(domain, proxy_address) == 200: + log.console_log("{}[+] Gotcha! You are in luck, boy![{}/.svn/]{}".format(Y, domain, W)) def robots_scraper(self, domain, proxy_address): scraper = RobotsScraper() data = scraper.check_robots(domain, proxy_address) - if data is not None and data.code == 200: - log.console_log("{}[+] Found interesting robots.txt content on domain {}:{}".format(Y, domain, W)) + if data is not None and isinstance(data, int) == False and data.code == 200: + log.console_log("{}[+] Found interesting robots.txt[ {} ] =>{}".format(Y, domain, W)) log.console_log(data.read()) + def gather_company(self, company_name, proxy_address): + log.console_log("{}[+] Gathering Company Employee {} -> {}".format(G, W, company_name)) + gather_company = GatherCompany() + gather_company.crawl_company_employee(company_name, proxy_address) + + def check_update(self, version): + log.console_log("{} Checking Version Update for Belati... {}".format(G, W)) + # TODO + + + def common_service_check(self, host): + log.console_log("{}[*] Checking Common Service Check on host {}{}".format(G, host, W)) + service_check = CommonServiceCheck() + service_check.check_available_service(host) + def check_python_version(self): if sys.version[:3] == "2.7" or "2" in sys.version[:3]: log.console_log("{}[*] Python version OK! {}{}".format(G, sys.version[:6], W)) diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..0025694 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,11 @@ +Changelog: + +v0.2.0-dev: + +Add Gather Company Employee +Add SVN Finder +Update URL Request +Rework Code +Fix small bug +Update Harvest Public Document Regex +Add version for updater diff --git a/README.md b/README.md index b95cc5b..3e81187 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ # Belati +Belati - The Traditional Swiss Army Knife For OSINT + Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose. This tools is inspired by Foca and Datasploit for OSINT :) ## Why I Made this? @@ -17,7 +19,10 @@ Just for learning stuff and OSINT purpose. Correct me if i'm wrong - Fake and Random User Agent ( Prevent from blocking ) - Proxy Support for Harvesting Emails and Documents - Public Git Finder in domain/subdomain +- Public SVN Finder in domain/subdomain - Robot.txt Scraper in domain/subdomain +- Gather Public Company Employee + ## TODO - Automatic OSINT with Username and Email support @@ -32,6 +37,7 @@ Just for learning stuff and OSINT purpose. Correct me if i'm wrong - Web version with Django - Scanning Report export to PDF - domain or subdomain reputation checker +- Reporting Support to JSON, PDF ## Install/Usage ``` @@ -85,6 +91,7 @@ yum install gcc gmp gmp-devel python-devel - Sublist3r - Subbrute - nmap +- git ## Notice I'm using PyWhois Library, Sublist3r, MailHarvester, Emingoo as part of my code. This tool is for educational purposes only. Any damage you make will not affect the author. Do It With Your Own Risk @@ -92,6 +99,13 @@ I'm using PyWhois Library, Sublist3r, MailHarvester, Emingoo as part of my code. ## Author Aan Wahyu a.k.a Petruknisme(https://petruknisme.com) +## Thanks To + +Thanks to PyWhois Library, Sublist3r, MailHarvester, Emingoo for being part of my code. Also thanks to Hispagatos, Infosec-ninjas, eCHo, RNDC( Research and development center ) and all other people who are inspiring this project :) + +Thanks to Echo-Zine Staff for approving my Ezine : http://ezine.echo.or.id/issue31/005.txt - Belati : Collecting Public Data & Public Document for OSINT Purpose - Petruknisme + + ## License Belati is licensed under GPL V2. You can use, modify, or redistribute this tool under the terms of GNU General Public License (GPLv2). diff --git a/plugins/about_project.py b/plugins/about_project.py index b919833..e7228c4 100644 --- a/plugins/about_project.py +++ b/plugins/about_project.py @@ -24,7 +24,7 @@ class AboutProject(object): def __init__(self): self.__info__ = 'Collecting Public Data & Public Document for OSINT purpose' self.__author__ = 'Petruknisme' - self.__version__ = 'v0.1.8-dev' + self.__version__ = 'v0.2.0-dev' self.__name__= "Belati" self.__giturl__ = "https://github.com/aancw/Belati" self.__authorurl__ = "https://petruknisme.com" diff --git a/plugins/common_service_check.py b/plugins/common_service_check.py new file mode 100644 index 0000000..e5c9ca9 --- /dev/null +++ b/plugins/common_service_check.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose. +# This tools is inspired by Foca and Datasploit for OSINT +# Copyright (C) 2017 cacaddv@gmail.com (Petruknisme a.k.a Aan Wahyu) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# This file is part of Belati project + +import sys, socket, errno +from logger import Logger +from url_request import URLRequest + + +# Console color +G = '\033[92m' # green +Y = '\033[93m' # yellow +B = '\033[94m' # blue +R = '\033[91m' # red +W = '\033[0m' # white + +url_req = URLRequest() +log = Logger() + +class CommonServiceCheck(object): + ## STILL NOT ACCURATE! + def check_available_service(self, host): + list_available_port = [] + list_common_port = [21,22,23,25,53,80,110,111,135,139,143,443,445,993,995,1723,3306,3389,5900,8080] + for port in list_common_port: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + s.connect((host, port)) + if port == 80: + data = url_req.header_info("http://" + host, "") + log.console_log("Found HTPP Service : ({} OPEN)".format(str(port)) ) + log.console_log("\n{}".format(data)) + elif port == 443: + data = url_req.header_info("https://" + host, "") + else: + print("port :" + str(port) + " OPEN! " + s.recv(4096)) + except socket.error as e: + if e.errno == errno.ECONNREFUSED or e.errno == 113: + pass + else: + print("port :" + str(port) + str(e) + "closed") + s.close() diff --git a/plugins/gather_company.py b/plugins/gather_company.py new file mode 100644 index 0000000..0d4109d --- /dev/null +++ b/plugins/gather_company.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose. +# This tools is inspired by Foca and Datasploit for OSINT +# Copyright (C) 2017 cacaddv@gmail.com (Petruknisme a.k.a Aan Wahyu) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# This file is part of Belati project + +import re,sys +from bs4 import BeautifulSoup +from logger import Logger +from url_request import URLRequest + +# Console color +G = '\033[92m' # green +Y = '\033[93m' # yellow +B = '\033[94m' # blue +R = '\033[91m' # red +W = '\033[0m' # white + +url_req = URLRequest() +log = Logger() + +class GatherCompany(object): + def crawl_company_employee(self, company_name, proxy_address): + comp_strip = company_name.replace(" ", "+") + url = 'https://www.google.com/search?q={}+site:linkedin.com&num=200'.format(comp_strip) + + data = url_req.standart_request(url, proxy_address) + + soup = BeautifulSoup( data, 'html.parser' ) + company_linkedin_url_list = [] + + #Getting all h3 tags with class 'r' + scrap_container = soup.find_all('div', class_='rc') + for rc in scrap_container: + soup2 = BeautifulSoup( str(rc), 'html.parser' ) + url = soup2.find_all('h3', class_= 'r') + url_fix = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', str(url)) + linkedin_url = re.findall(r'(http[s]?://.*\.linkedin\.com/in/.*)', str(url_fix).strip("\'[]")) # filter only *.linked.com + company_linkedin_url = re.findall(r'(http[s]?://.*\.linkedin\.com/company/.*)', str(url_fix).strip("\'[]")) # filter only *.linked.com/company + job_title = soup2.find_all('div', class_='slp f') + + if company_linkedin_url: + company_linkedin_url_list.append(company_linkedin_url) + + # Get data when linkedin url is like this : *.linkedin.com/in + if not linkedin_url: + pass + else: + name_fix = re.sub('<[^<]+?>', '', str(rc.h3.a)) # strip all html tags like + job_title_fix = re.sub('<[^<]+?>', '', str(job_title)) # strip all html tags like + log.console_log("{}[+] --------------------------------------------------- [+]{}".format(Y, W)) + log.console_log("Name: {}".format( name_fix.replace('| LinkedIn', '') )) + log.console_log("Job Title: {}".format( str(job_title_fix.replace('\u200e', ' ')).strip("\'[]") )) + log.console_log("Url: {}".format( str(linkedin_url).strip("\'[]") )) + log.console_log("{}[+] --------------------------------------------------- [+]{}\n".format(Y, W)) + + log.console_log("\n\n{}[+] --------------------------------------------------- [+]{}".format(Y, W)) + log.console_log("{}[+] Found LinkedIn Company URL: {}".format(Y, W)) + for url in company_linkedin_url_list: + log.console_log("{} {} {}".format(Y, str(url), W)) diff --git a/plugins/harvest_public_document.py b/plugins/harvest_public_document.py index f568427..6276fca 100644 --- a/plugins/harvest_public_document.py +++ b/plugins/harvest_public_document.py @@ -20,7 +20,7 @@ # This file is part of Belati project -import re, os +import re, os, errno import urllib from logger import Logger from tqdm import tqdm @@ -54,7 +54,10 @@ def harvest_public_doc(self, domain, extension, proxy_address): total_files = 0 url = 'https://www.google.com/search?q=site:' + domain + '%20ext:' + extension + '&filter=0&num=200' data = data = url_req.standart_request(url, proxy_address) - regex = "(?Phttps?://[^:]+\.%s)" % extension + # Rehttps?:\/\/[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+.pdf + # (?Phttps?://[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+\.pdf) + # "(?Phttps?://[^:]+\.%s)" % extension + regex = "(?Phttps?://[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+\.{})".format(extension) data = re.findall(regex, data) list_files_download = list(set(data)) total_files = str(len(list_files_download)) diff --git a/plugins/logger.py b/plugins/logger.py index 150174d..c4bc123 100644 --- a/plugins/logger.py +++ b/plugins/logger.py @@ -20,7 +20,7 @@ # This file is part of Belati project -import sys, os +import sys, os, errno import logging import time diff --git a/plugins/robots_scraper.py b/plugins/robots_scraper.py index 1e74e8a..14a6e2e 100644 --- a/plugins/robots_scraper.py +++ b/plugins/robots_scraper.py @@ -30,6 +30,7 @@ def check_robots(self, domain_name, proxy_address): url_request = "{}/robots.txt".format(domain_name, proxy_address) data = url_req.just_url_open(url_request, proxy_address) if data is not "" and data is not "notexist": - return data + if data.getcode() == 200 and data.getcode() != 302: + return data except: pass diff --git a/plugins/svn_finder.py b/plugins/svn_finder.py new file mode 100644 index 0000000..4e29fd8 --- /dev/null +++ b/plugins/svn_finder.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose. +# This tools is inspired by Foca and Datasploit for OSINT +# Copyright (C) 2017 cacaddv@gmail.com (Petruknisme a.k.a Aan Wahyu) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# This file is part of Belati project + +import sys, re, time +from url_request import URLRequest + +# Console color +G = '\033[92m' # green +Y = '\033[93m' # yellow +B = '\033[94m' # blue +R = '\033[91m' # red +W = '\033[0m' # white + +url_req = URLRequest() + +class SVNFinder(object): + def check_svn(self, domain, proxy_address): + try: + data = url_req.just_url_open(url_req.ssl_checker(domain) + "/.svn/", proxy_address) + + if data is not None and data is not "notexist": + if data == 403: + return data + if data.getcode() == 200 and data.getcode() != 302: + return data.getcode() + except: + pass diff --git a/plugins/url_request.py b/plugins/url_request.py index 1325f5e..9392b6d 100644 --- a/plugins/url_request.py +++ b/plugins/url_request.py @@ -119,8 +119,10 @@ def just_url_open(self, url_request, proxy_address): opener.addheaders = [('User-agent', ua.get_user_agent() )] urllib2.install_opener(opener) req = urllib2.Request(url_request) - data = urllib2.urlopen(req) + data = urllib2.urlopen(req, timeout=25) return data + except urllib2.HTTPError, e: + return e.code except urllib2.URLError, e: if str(e.reason) == "[Errno -2] Name or service not known": log.console_log("Not EXIST!") @@ -135,7 +137,7 @@ def ssl_checker(self, domain): # Skip SSL Verification Check! # https://stackoverflow.com/questions/27835619/ssl-certificate-verify-failed-error gcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1) # Only for gangstars - data = urllib2.urlopen("https://{}".format(domain), timeout=15, context=gcontext) + data = urllib2.urlopen("https://{}".format(domain), timeout=25, context=gcontext) if not "ERROR" in data: use_ssl = True except urllib2.HTTPError, e: diff --git a/requirements.txt b/requirements.txt index e6b42e7..5ca62d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ tldextract fake-useragent python-wappalyzer future +beautifulsoup4 diff --git a/version b/version new file mode 100644 index 0000000..3ea3eab --- /dev/null +++ b/version @@ -0,0 +1 @@ +v0.2.0-dev