diff --git a/README.md b/README.md index e82afcb..36dc3ac 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ OSINT Tool: Generate username lists from companies on LinkedIn. This is a pure web-scraper, no API key required. You use your valid LinkedIn username and password to login, it will create several lists of possible username formats for all employees of a company you point it at. +Login is done with Selenium in a spawned browser window. Maintaining a working CLI login flow was a lot of work, and this resolves many issues while supporting login challenges and 2FA. + Use an account with a lot of connections, otherwise you'll get crappy results. Adding a couple connections at the target company should help - this tool will work up to third degree connections. Note that [LinkedIn will cap search results](https://www.linkedin.com/help/linkedin/answer/129/what-you-get-when-you-search-on-linkedin?lang=en) to 1000 employees max. You can use the features '--geoblast' or '--keywords' to bypass this limit. Look at help below for more details. **WARNING**: LinkedIn has recently (Sept 2020) been hitting li2u users with the monthly commercial search limit. It's a bit mysterious as to when/why this happens. When you hit the limit, you won't be able to search again until the 1st of the month. If you know of a workaround, please let me know. @@ -24,51 +26,45 @@ You'll need to provide the tool with LinkedIn's company name. You can find that Here's an example to pull all employees of Uber: ``` -$ python linkedin2username.py myname@email.com uber-com +$ python linkedin2username.py -c uber-com ``` Here's an example to pull a shorter list and append the domain name @uber.com to them: ``` -$ python linkedin2username.py myname@email.com uber-com -d 5 -n 'uber.com' +$ python linkedin2username.py -c uber-com -d 5 -n 'uber.com' ``` # Full Help ``` -usage: linkedin2username.py [-h] [-p PASSWORD] [-n DOMAIN] [-d DEPTH] - [-s SLEEP] - username company +usage: linkedin2username.py [-h] -c COMPANY [-n DOMAIN] [-d DEPTH] + [-s SLEEP] [-x PROXY] [-k KEYWORDS] [-g] [-o OUTPUT] -positional arguments: - username A valid LinkedIn username. - company Company name. +OSINT tool to generate lists of probable usernames from a given company's LinkedIn page. +This tool may break when LinkedIn changes their site. +Please open issues on GitHub to report any inconsistencies. optional arguments: -h, --help show this help message and exit - -p PASSWORD, --password PASSWORD - Specify your password on in clear-text on the command - line. If not specified, will prompt and not display on - screen. + -c COMPANY, --company COMPANY + Company name exactly as typed in the company linkedin profile page URL. -n DOMAIN, --domain DOMAIN - Append a domain name to username output. [example: '-n - uber.com' would ouput jschmoe@uber.com] + Append a domain name to username output. [example: "-n uber.com" would + output jschmoe@uber.com] -d DEPTH, --depth DEPTH - Search depth. If unset, will try to grab them all. + Search depth (how many loops of 25). If unset, will try to grab them + all. -s SLEEP, --sleep SLEEP - Seconds to sleep between pages. defaults to 3. + Seconds to sleep between search loops. Defaults to 0. -x PROXY, --proxy PROXY - HTTPS proxy server to use. Example: "-p - https://localhost:8080" WARNING: WILL DISABLE SSL - VERIFICATION. - + Proxy server to use. WARNING: WILL DISABLE SSL VERIFICATION. + [example: "-p https://localhost:8080"] -k KEYWORDS, --keywords KEYWORDS - Filter results by a a list of command separated - keywords. Will do a separate loop for each keyword, - potentially bypassing the 1,000 record limit. - [example: "-k 'sales,human resources,information - technology'] - -g, --geoblast Attempts to bypass the 1,000 record search limit by - running multiple searches split across geographic - regions. + Filter results by a a list of command separated keywords. + Will do a separate loop for each keyword, + potentially bypassing the 1,000 record limit. + [example: "-k 'sales,human resources,information technology'] + -g, --geoblast Attempts to bypass the 1,000 record search limit by running + multiple searches split across geographic regions. -o OUTPUT, --output OUTPUT Output Directory, defaults to li2u-output ``` @@ -76,6 +72,4 @@ optional arguments: # Toubleshooting Sometimes LinkedIn does weird stuff or returns weird results. Sometimes it doesn't like you logging in from new locations. If something looks off, run the tool once or twice more. If it still isn't working, please open an issue. -Multi-factor authentication (MFA, 2FA) is not supported in this tool. - *This is a security research tool. Use only where granted explicit permission from the network owner.* diff --git a/linkedin2username.py b/linkedin2username.py index 60db2e0..6537a6e 100755 --- a/linkedin2username.py +++ b/linkedin2username.py @@ -13,12 +13,14 @@ import re import time import argparse -import getpass import json import urllib.parse import requests import urllib3 +from selenium import webdriver +from selenium.common.exceptions import WebDriverException + BANNER = r""" .__ .__________ @@ -192,17 +194,10 @@ def parse_arguments(): ' to report any inconsistencies, and they will be quickly fixed.') parser = argparse.ArgumentParser(description=desc) - parser.add_argument('-u', '--username', type=str, action='store', - required=True, - help='A valid LinkedIn username.') parser.add_argument('-c', '--company', type=str, action='store', required=True, help='Company name exactly as typed in the company ' 'linkedin profile page URL.') - parser.add_argument('-p', '--password', type=str, action='store', - help='Specify your password in clear-text on the ' - 'command line. If not specified, will prompt and ' - 'obfuscate as you type.') parser.add_argument('-n', '--domain', type=str, action='store', default='', help='Append a domain name to username output. ' @@ -251,42 +246,48 @@ def parse_arguments(): print("Sorry, keywords and geoblast are currently not compatible. Use one or the other.") sys.exit() - # If password is not passed in the command line, prompt for it - # in a more secure fashion (not shown on screen) - args.password = args.password or getpass.getpass() - return args -def login(args): - """Creates a new authenticated session. +def get_webdriver(): + """ + Try to get a working Selenium browser driver + """ + for browser in [webdriver.Firefox, webdriver.Chrome]: + try: + return browser() + except WebDriverException: + continue + return None - Note that a mobile user agent is used. Parsing using the desktop results - proved extremely difficult, as shared connections would be returned in - a manner that was indistinguishable from the desired targets. - The other header matters as well, otherwise advanced search functions - (region and keyword) will not work. +def login(): + """Creates a new authenticated session. - The function will check for common failure scenarios - the most common is - logging in from a new location. Accounts using multi-factor auth are not - yet supported and will produce an error. + This now uses Selenium because I got very tired playing cat/mouse + with LinkedIn's login process. """ - session = requests.session() + driver = get_webdriver() - # The following are known errors that require the user to log in via the web - login_problems = ['challenge', 'captcha', 'manage-account', 'add-email'] + if driver is None: + print("[!] Could not find a supported browser for Selenium. Exiting.") + sys.exit(1) - # Special options below when using a proxy server. Helpful for debugging - # the application in Burp Suite. - if args.proxy: - print("[!] Using a proxy, ignoring SSL errors. Don't get pwned.") - session.verify = False - urllib3.disable_warnings(category=urllib3.exceptions.InsecureRequestWarning) - session.proxies.update(args.proxy_dict) + driver.get("https://linkedin.com/login") + + # Pause until the user lets us know the session is good. + print("[*] Log in to LinkedIn. Leave the browser open and press enter when ready...") + input("Ready? Press Enter!") + + selenium_cookies = driver.get_cookies() + driver.quit() + + # Initialize and return a requests session + session = requests.Session() + for cookie in selenium_cookies: + session.cookies.set(cookie['name'], cookie['value']) - # Our search and regex will work only with a mobile user agent and - # the correct REST protocol specified below. + # Add headers required for this tool to function mobile_agent = ('Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; SCH-I535 ' 'Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) ' 'Version/4.0 Mobile Safari/534.30') @@ -294,73 +295,10 @@ def login(args): 'X-RestLi-Protocol-Version': '2.0.0', 'X-Li-Track': '{"clientVersion":"1.13.1665"}'}) - # We wll grab an anonymous response to look for the CSRF token, which - # is required for our logon attempt. - anon_response = session.get('https://www.linkedin.com/login') - login_csrf = re.findall(r'name="loginCsrfParam" value="(.*?)"', - anon_response.text) - if login_csrf: - login_csrf = login_csrf[0] - else: - print("Having trouble loading login page... try the command again.") - sys.exit() - - # Define the data we will POST for our login. - auth_payload = { - 'session_key': args.username, - 'session_password': args.password, - 'isJsEnabled': 'false', - 'loginCsrfParam': login_csrf - } - - # Perform the actual login. We disable redirects as we will use that 302 - # as an indicator of a successful logon. - response = session.post('https://www.linkedin.com/checkpoint/lg/login-submit' - '?loginSubmitSource=GUEST_HOME', - data=auth_payload, allow_redirects=False) - - # Define a successful login by the 302 redirect to the 'feed' page. Try - # to detect some other common logon failures and alert the user. - if response.status_code in (302, 303): - # Add CSRF token for all additional requests - session = set_csrf_token(session) - redirect = response.headers['Location'] - if 'feed' in redirect: - return session - if 'add-phone' in redirect: - # Skip the prompt to add a phone number - url = 'https://www.linkedin.com/checkpoint/post-login/security/dismiss-phone-event' - response = session.post(url) - if response.status_code == 200: - return session - print("[!] Could not skip phone prompt. Log in via the web and then try again.\n") - - elif any(x in redirect for x in login_problems): - print("[!] LinkedIn has a message for you that you need to address. " - "Please log in using a web browser first, and then come back and try again.") - else: - # The below will detect some 302 that I don't yet know about. - print("[!] Some unknown redirection occurred. If this persists, please open an issue " - "and include the info below:") - print("DEBUG INFO:") - print(f"LOCATION: {redirect}") - print(f"RESPONSE TEXT:\n{response.text}") - - return False - - # A failed logon doesn't generate a 302 at all, but simply responds with - # the logon page. We detect this here. - if 'LinkedIn Login' in response.text: - print("[!] Check your username and password and try again.\n") - return False + # Set the CSRF token + session = set_csrf_token(session) - # If we make it past everything above, we have no idea what happened. - # Oh well, we fail. - print("[!] Some unknown error logging in. If this persists, please open an issue on github.\n") - print("DEBUG INFO:") - print(f"RESPONSE CODE: {response.status_code}") - print(f"RESPONSE TEXT:\n{response.text}") - return False + return session def set_csrf_token(session): @@ -717,14 +655,20 @@ def main(): args = parse_arguments() # Instantiate a session by logging in to LinkedIn. - session = login(args) + session = login() # If we can't get a valid session, we quit now. Specific errors are # printed to the console inside the login() function. if not session: sys.exit() - print("[*] Successfully logged in.") + # Special options below when using a proxy server. Helpful for debugging + # the application in Burp Suite. + if args.proxy: + print("[!] Using a proxy, ignoring SSL errors. Don't get pwned.") + session.verify = False + urllib3.disable_warnings(category=urllib3.exceptions.InsecureRequestWarning) + session.proxies.update(args.proxy_dict) # Get basic company info print("[*] Trying to get company info...") diff --git a/requirements.txt b/requirements.txt index f229360..a2ca48c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ requests +selenium