Skip to content
This repository has been archived by the owner on Apr 4, 2024. It is now read-only.

Commit

Permalink
Add Gather Company Employee
Browse files Browse the repository at this point in the history
Add SVN Finder
Update URL Request
Rework Code
Fix small bug
Update Harvest Public Document Regex
Add version for updater
  • Loading branch information
aancw committed May 2, 2017
1 parent 1d32eb1 commit 2938987
Show file tree
Hide file tree
Showing 13 changed files with 271 additions and 19 deletions.
61 changes: 49 additions & 12 deletions Belati.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,22 @@
import time
import dns.resolver
import tldextract
from plugins.check_domain import CheckDomain

from plugins.about_project import AboutProject
from plugins.banner_grab import BannerGrab
from plugins.logger import Logger
from plugins.check_domain import CheckDomain
from plugins.common_service_check import CommonServiceCheck
from plugins.gather_company import GatherCompany
from plugins.git_finder import GitFinder
from plugins.harvest_email import HarvestEmail
from plugins.harvest_public_document import HarvestPublicDocument
from plugins.scan_nmap import ScanNmap
from plugins.wappalyzer import Wappalyzer
from plugins.git_finder import GitFinder
from plugins.logger import Logger
from plugins.robots_scraper import RobotsScraper
from plugins.about_project import AboutProject
from plugins.scan_nmap import ScanNmap
from plugins.svn_finder import SVNFinder
from plugins.url_request import URLRequest
from plugins.wappalyzer import Wappalyzer

from lib.Sublist3r import sublist3r
from lib.CheckMyUsername.check_my_username import CheckMyUsername
from dnsknife.scanner import Scanner
Expand All @@ -66,7 +71,7 @@ def __init__(self):
parser.add_argument('-d', action='store', dest='domain' , help='Perform OSINT from Domain e.g petruknisme.com(without protocol http/https)')
parser.add_argument('-u', action='store', dest='username' , help='Perform OSINT from username e.g petruknisme')
parser.add_argument('-e', action='store', dest='email' , help='Perform OSINT from email address')
parser.add_argument('-c', action='store', dest='orgcomp' , help='Perform OSINT from Organization or Company Name')
parser.add_argument('-c', action='store', dest='orgcomp' , help='Perform OSINT from Organization or Company Name, use double quote')
parser.add_argument('-o', action='store', dest='output_files' , help='Save log for output files')
parser.add_argument('--db-file', action='store', dest='db_file_location' , help='Specify Database File Location(SQLite3)')
parser.add_argument('--single-proxy', action='store', dest='single_proxy', help='Proxy support with single IP (ex: http://127.0.0.1:8080)' )
Expand Down Expand Up @@ -102,10 +107,10 @@ def __init__(self):
proxy = self.multiple_proxy_list

extract_domain = tldextract.extract(domain)

self.check_domain(self.url_req.ssl_checker(domain), proxy)
self.banner_grab(self.url_req.ssl_checker(domain), proxy)


if extract_domain.subdomain == "":
self.robots_scraper(self.url_req.ssl_checker(domain), proxy)
self.enumerate_subdomains(domain, proxy)
Expand All @@ -125,7 +130,10 @@ def __init__(self):
if username is not None:
self.username_checker(username)

if email or orgcomp is not None:
if orgcomp is not None:
self.gather_company(orgcomp, proxy)

if email is not None:
log.console_log("This feature will be coming soon. Be patient :)")

log.console_log("{}All done sir! All log saved in log directory and dowloaded file saved in belatiFiles {}".format(Y, W))
Expand All @@ -142,6 +150,7 @@ def show_banner(self):
| $$$$$$$/| $$$$$$$$| $$$$$$$$| $$ | $$ | $$ /$$$$$$
|_______/ |________/|________/|__/ |__/ |__/ |______/
The Traditional Swiss Army Knife for OSINT
=[ {} {} by {}]=
Expand Down Expand Up @@ -188,13 +197,18 @@ def enumerate_subdomains(self, domain_name, proxy):
self.robots_scraper(self.url_req.ssl_checker(subdomain), proxy)
self.wappalyzing_webpage(subdomain)
self.public_git_finder(subdomain, proxy)
self.public_svn_finder(subdomain, proxy)
try:
subdomain_ip_list.append(socket.gethostbyname(subdomain))
except socket.gaierror:
pass

subdomain_ip_listFix = list(set(subdomain_ip_list))

# check common service port TODO
#for ipaddress in subdomain_ip_listFix:
#self.common_service_check(ipaddress)

for ipaddress in subdomain_ip_listFix:
self.service_scanning(ipaddress)

Expand Down Expand Up @@ -300,15 +314,38 @@ def public_git_finder(self, domain, proxy_address):
log.console_log("{}[*] Checking Public GIT Directory on domain {}{}".format(G, domain, W))
git_finder = GitFinder()
if git_finder.check_git(domain, proxy_address) == True:
log.console_log("{}[+] Gotcha! You are in luck boy!{}".format(G, W))
log.console_log("{}[+] Gotcha! You are in luck, boy![{}/.git/]{}".format(Y, domain, W))

def public_svn_finder(self, domain, proxy_address):
log.console_log("{}[*] Checking Public SVN Directory on domain {}{}".format(G, domain, W))
svn_finder = SVNFinder()
if svn_finder.check_svn(domain, proxy_address) == 403:
log.console_log("{}[+] Um... Forbidden :( {}".format(Y, W))
if svn_finder.check_svn(domain, proxy_address) == 200:
log.console_log("{}[+] Gotcha! You are in luck, boy![{}/.svn/]{}".format(Y, domain, W))

def robots_scraper(self, domain, proxy_address):
scraper = RobotsScraper()
data = scraper.check_robots(domain, proxy_address)
if data is not None and data.code == 200:
log.console_log("{}[+] Found interesting robots.txt content on domain {}:{}".format(Y, domain, W))
if data is not None and isinstance(data, int) == False and data.code == 200:
log.console_log("{}[+] Found interesting robots.txt[ {} ] =>{}".format(Y, domain, W))
log.console_log(data.read())

def gather_company(self, company_name, proxy_address):
log.console_log("{}[+] Gathering Company Employee {} -> {}".format(G, W, company_name))
gather_company = GatherCompany()
gather_company.crawl_company_employee(company_name, proxy_address)

def check_update(self, version):
log.console_log("{} Checking Version Update for Belati... {}".format(G, W))
# TODO


def common_service_check(self, host):
log.console_log("{}[*] Checking Common Service Check on host {}{}".format(G, host, W))
service_check = CommonServiceCheck()
service_check.check_available_service(host)

def check_python_version(self):
if sys.version[:3] == "2.7" or "2" in sys.version[:3]:
log.console_log("{}[*] Python version OK! {}{}".format(G, sys.version[:6], W))
Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Changelog:

v0.2.0-dev:

Add Gather Company Employee
Add SVN Finder
Update URL Request
Rework Code
Fix small bug
Update Harvest Public Document Regex
Add version for updater
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Belati
Belati - The Traditional Swiss Army Knife For OSINT

Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose. This tools is inspired by Foca and Datasploit for OSINT :)

## Why I Made this?
Expand All @@ -17,7 +19,10 @@ Just for learning stuff and OSINT purpose. Correct me if i'm wrong
- Fake and Random User Agent ( Prevent from blocking )
- Proxy Support for Harvesting Emails and Documents
- Public Git Finder in domain/subdomain
- Public SVN Finder in domain/subdomain
- Robot.txt Scraper in domain/subdomain
- Gather Public Company Employee


## TODO
- Automatic OSINT with Username and Email support
Expand All @@ -32,6 +37,7 @@ Just for learning stuff and OSINT purpose. Correct me if i'm wrong
- Web version with Django
- Scanning Report export to PDF
- domain or subdomain reputation checker
- Reporting Support to JSON, PDF

## Install/Usage
```
Expand Down Expand Up @@ -85,13 +91,21 @@ yum install gcc gmp gmp-devel python-devel
- Sublist3r
- Subbrute
- nmap
- git

## Notice
I'm using PyWhois Library, Sublist3r, MailHarvester, Emingoo as part of my code. This tool is for educational purposes only. Any damage you make will not affect the author. Do It With Your Own Risk

## Author
Aan Wahyu a.k.a Petruknisme(https://petruknisme.com)

## Thanks To

Thanks to PyWhois Library, Sublist3r, MailHarvester, Emingoo for being part of my code. Also thanks to Hispagatos, Infosec-ninjas, eCHo, RNDC( Research and development center ) and all other people who are inspiring this project :)

Thanks to Echo-Zine Staff for approving my Ezine : http://ezine.echo.or.id/issue31/005.txt - Belati : Collecting Public Data & Public Document for OSINT Purpose - Petruknisme


## License
Belati is licensed under GPL V2. You can use, modify, or redistribute this tool under the terms of GNU General Public License (GPLv2).

Expand Down
2 changes: 1 addition & 1 deletion plugins/about_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class AboutProject(object):
def __init__(self):
self.__info__ = 'Collecting Public Data & Public Document for OSINT purpose'
self.__author__ = 'Petruknisme'
self.__version__ = 'v0.1.8-dev'
self.__version__ = 'v0.2.0-dev'
self.__name__= "Belati"
self.__giturl__ = "https://github.com/aancw/Belati"
self.__authorurl__ = "https://petruknisme.com"
Expand Down
60 changes: 60 additions & 0 deletions plugins/common_service_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose.
# This tools is inspired by Foca and Datasploit for OSINT
# Copyright (C) 2017 [email protected] (Petruknisme a.k.a Aan Wahyu)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# This file is part of Belati project

import sys, socket, errno
from logger import Logger
from url_request import URLRequest


# Console color
G = '\033[92m' # green
Y = '\033[93m' # yellow
B = '\033[94m' # blue
R = '\033[91m' # red
W = '\033[0m' # white

url_req = URLRequest()
log = Logger()

class CommonServiceCheck(object):
## STILL NOT ACCURATE!
def check_available_service(self, host):
list_available_port = []
list_common_port = [21,22,23,25,53,80,110,111,135,139,143,443,445,993,995,1723,3306,3389,5900,8080]
for port in list_common_port:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect((host, port))
if port == 80:
data = url_req.header_info("http://" + host, "")
log.console_log("Found HTPP Service : ({} OPEN)".format(str(port)) )
log.console_log("\n{}".format(data))
elif port == 443:
data = url_req.header_info("https://" + host, "")
else:
print("port :" + str(port) + " OPEN! " + s.recv(4096))
except socket.error as e:
if e.errno == errno.ECONNREFUSED or e.errno == 113:
pass
else:
print("port :" + str(port) + str(e) + "closed")
s.close()
76 changes: 76 additions & 0 deletions plugins/gather_company.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Belati is tool for Collecting Public Data & Public Document from Website and other service for OSINT purpose.
# This tools is inspired by Foca and Datasploit for OSINT
# Copyright (C) 2017 [email protected] (Petruknisme a.k.a Aan Wahyu)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# This file is part of Belati project

import re,sys
from bs4 import BeautifulSoup
from logger import Logger
from url_request import URLRequest

# Console color
G = '\033[92m' # green
Y = '\033[93m' # yellow
B = '\033[94m' # blue
R = '\033[91m' # red
W = '\033[0m' # white

url_req = URLRequest()
log = Logger()

class GatherCompany(object):
def crawl_company_employee(self, company_name, proxy_address):
comp_strip = company_name.replace(" ", "+")
url = 'https://www.google.com/search?q={}+site:linkedin.com&num=200'.format(comp_strip)

data = url_req.standart_request(url, proxy_address)

soup = BeautifulSoup( data, 'html.parser' )
company_linkedin_url_list = []

#Getting all h3 tags with class 'r'
scrap_container = soup.find_all('div', class_='rc')
for rc in scrap_container:
soup2 = BeautifulSoup( str(rc), 'html.parser' )
url = soup2.find_all('h3', class_= 'r')
url_fix = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', str(url))
linkedin_url = re.findall(r'(http[s]?://.*\.linkedin\.com/in/.*)', str(url_fix).strip("\'[]")) # filter only *.linked.com
company_linkedin_url = re.findall(r'(http[s]?://.*\.linkedin\.com/company/.*)', str(url_fix).strip("\'[]")) # filter only *.linked.com/company
job_title = soup2.find_all('div', class_='slp f')

if company_linkedin_url:
company_linkedin_url_list.append(company_linkedin_url)

# Get data when linkedin url is like this : *.linkedin.com/in
if not linkedin_url:
pass
else:
name_fix = re.sub('<[^<]+?>', '', str(rc.h3.a)) # strip all html tags like <em>
job_title_fix = re.sub('<[^<]+?>', '', str(job_title)) # strip all html tags like <em>
log.console_log("{}[+] --------------------------------------------------- [+]{}".format(Y, W))
log.console_log("Name: {}".format( name_fix.replace('| LinkedIn', '') ))
log.console_log("Job Title: {}".format( str(job_title_fix.replace('\u200e', ' ')).strip("\'[]") ))
log.console_log("Url: {}".format( str(linkedin_url).strip("\'[]") ))
log.console_log("{}[+] --------------------------------------------------- [+]{}\n".format(Y, W))

log.console_log("\n\n{}[+] --------------------------------------------------- [+]{}".format(Y, W))
log.console_log("{}[+] Found LinkedIn Company URL: {}".format(Y, W))
for url in company_linkedin_url_list:
log.console_log("{} {} {}".format(Y, str(url), W))
7 changes: 5 additions & 2 deletions plugins/harvest_public_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

# This file is part of Belati project

import re, os
import re, os, errno
import urllib
from logger import Logger
from tqdm import tqdm
Expand Down Expand Up @@ -54,7 +54,10 @@ def harvest_public_doc(self, domain, extension, proxy_address):
total_files = 0
url = 'https://www.google.com/search?q=site:' + domain + '%20ext:' + extension + '&filter=0&num=200'
data = data = url_req.standart_request(url, proxy_address)
regex = "(?P<url>https?://[^:]+\.%s)" % extension
# Re<url>https?:\/\/[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+.pdf
# (?P<url>https?://[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+\.pdf)
# "(?P<url>https?://[^:]+\.%s)" % extension
regex = "(?P<url>https?://[A-Za-z0-9\-\?&#_~@=\.\/%\[\]\+]+\.{})".format(extension)
data = re.findall(regex, data)
list_files_download = list(set(data))
total_files = str(len(list_files_download))
Expand Down
2 changes: 1 addition & 1 deletion plugins/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

# This file is part of Belati project

import sys, os
import sys, os, errno
import logging
import time

Expand Down
3 changes: 2 additions & 1 deletion plugins/robots_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def check_robots(self, domain_name, proxy_address):
url_request = "{}/robots.txt".format(domain_name, proxy_address)
data = url_req.just_url_open(url_request, proxy_address)
if data is not "" and data is not "notexist":
return data
if data.getcode() == 200 and data.getcode() != 302:
return data
except:
pass
Loading

0 comments on commit 2938987

Please sign in to comment.