diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6769e21
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
\ No newline at end of file
diff --git a/LinkedInt.py b/LinkedInt.py
index ca2e327..49bec11 100644
--- a/LinkedInt.py
+++ b/LinkedInt.py
@@ -8,472 +8,585 @@
# --- Addition of Hunter for e-mail prediction
-#!/usr/bin/python
+#!/usr/bin/env python3
+from code import interact
+from bs4 import BeautifulSoup
+from Crypto.Cipher import AES
import socket
import sys
import re
-import time
+import os
+import json
import requests
-import subprocess
import json
import argparse
import smtplib
import dns.resolver
-import cookielib
-import os
-import urllib
-import math
-import urllib2
-import string
-from bs4 import BeautifulSoup
-from thready import threaded
-
-reload(sys)
-sys.setdefaultencoding('utf-8')
-
-""" Setup Argument Parameters """
-parser = argparse.ArgumentParser(description='Discovery LinkedIn')
-parser.add_argument('-u', '--keywords', help='Keywords to search')
-parser.add_argument('-o', '--output', help='Output file (do not include extentions)')
-args = parser.parse_args()
-api_key = "" # Hunter API key
-username = "" # enter username here
-password = "" # enter password here
-
-if api_key == "" or username == "" or password == "":
- print "[!] Oops, you did not enter your api_key, username, or password in LinkedInt.py"
- sys.exit(0)
-
-def login():
- cookie_filename = "cookies.txt"
- cookiejar = cookielib.MozillaCookieJar(cookie_filename)
- opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(),urllib2.HTTPHandler(debuglevel=0),urllib2.HTTPSHandler(debuglevel=0),urllib2.HTTPCookieProcessor(cookiejar))
- page = loadPage(opener, "https://www.linkedin.com/")
- parse = BeautifulSoup(page, "html.parser")
-
- csrf = parse.find(id="loginCsrfParam-login")['value']
-
- login_data = urllib.urlencode({'session_key': username, 'session_password': password, 'loginCsrfParam': csrf})
- page = loadPage(opener,"https://www.linkedin.com/uas/login-submit", login_data)
-
- parse = BeautifulSoup(page, "html.parser")
- cookie = ""
-
- try:
- cookie = cookiejar._cookies['.www.linkedin.com']['/']['li_at'].value
- except:
- sys.exit(0)
-
- cookiejar.save()
- os.remove(cookie_filename)
- return cookie
-
-def loadPage(client, url, data=None):
- try:
- response = client.open(url)
- except:
- print "[!] Cannot load main LinkedIn page"
- try:
- if data is not None:
- response = client.open(url, data)
- else:
- response = client.open(url)
- return ''.join(response.readlines())
- except:
- sys.exit(0)
-
-def get_search():
-
- body = ""
- csv = []
- css = """
-
- """
-
- header = """
-
- | Photo |
- Name |
- Possible Email: |
- Job |
- Location |
-
- """
-
- # Do we want to automatically get the company ID?
-
-
- if bCompany:
- if bAuto:
- # Automatic
- # Grab from the URL
- companyID = 0
- url = "https://www.linkedin.com/voyager/api/typeahead/hits?q=blended&query=%s" % search
- headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
- cookies['JSESSIONID'] = 'ajax:0397788525211216808'
- r = requests.get(url, cookies=cookies, headers=headers)
- content = json.loads(r.text)
- firstID = 0
- for i in range(0,len(content['elements'])):
- try:
- companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id']
- if firstID == 0:
- firstID = companyID
- print "[Notice] Found company ID: %s" % companyID
- except:
- continue
- companyID = firstID
- if companyID == 0:
- print "[WARNING] No valid company ID found in auto, please restart and find your own"
- else:
- # Don't auto, use the specified ID
- companyID = bSpecific
-
- print
-
- print "[*] Using company ID: %s" % companyID
-
- # Fetch the initial page to get results/page counts
- if bCompany == False:
- url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=0" % search
- else:
- url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=0" % (companyID)
-
- print url
-
- headers = {'Csrf-Token':'ajax:0397788525211216808', 'X-RestLi-Protocol-Version':'2.0.0'}
- cookies['JSESSIONID'] = 'ajax:0397788525211216808'
- #print url
- r = requests.get(url, cookies=cookies, headers=headers)
- content = json.loads(r.text)
- data_total = content['elements'][0]['total']
-
- # Calculate pages off final results at 40 results/page
- pages = data_total / 40
-
- if pages == 0:
- pages = 1
-
- if data_total % 40 == 0:
- # Becuase we count 0... Subtract a page if there are no left over results on the last page
- pages = pages - 1
-
- if pages == 0:
- print "[!] Try to use quotes in the search name"
- sys.exit(0)
-
- print "[*] %i Results Found" % data_total
- if data_total > 1000:
- pages = 25
- print "[*] LinkedIn only allows 1000 results. Refine keywords to capture all data"
- print "[*] Fetching %i Pages" % pages
- print
-
- for p in range(pages):
- # Request results for each page using the start offset
- if bCompany == False:
- url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List()&keywords=%s&origin=OTHER&q=guided&start=%i" % (search, p*40)
+import urllib3
+
+PROXY_SETTING = {} #{"http": "http://localhost:8081", "https": "http://localhost:8081"}
+
+SSL_VERIFY = False
+
+PREFIX_CHOICES = ["auto", "full", "firstlast", "firstmlast", "flast", "first", "first.last", "fmlast", "lastfirst"]
+
+class CookieDumper():
+ cookie_path = ''
+ local_state = ''
+ def __init__(self):
+ if os.name != 'nt':
+ print ('Only Windows OS is supported at this time')
+ sys.exit(1)
+ self.cookie_path = os.path.join(os.getenv('LOCALAPPDATA'), 'Google\\Chrome\\User Data\\Default\\Network\\Cookies')
+ if not os.path.exists(self.cookie_path):
+ print ('Could not find Cookie file. Please ensure you have Chrome installed and have logged in to LinkedIn')
+ sys.exit(1)
+ self.local_state = os.path.join(os.getenv('LOCALAPPDATA'), 'Google\\Chrome\\User Data\\Local State')
+ if not os.path.exists(self.local_state):
+ print ('Could not find Local State file. Please ensure you have Chrome installed and have logged in to LinkedIn')
+ sys.exit(1)
+
+ @staticmethod
+ def generate_cipher(aes_key, iv):
+ return AES.new(aes_key, AES.MODE_GCM, iv)
+
+ @staticmethod
+ def decrypt_payload(cipher, payload):
+ return cipher.decrypt(payload)
+
+ def decrypt_value(self, key, data):
+ try:
+ iv = data[3:15]
+ payload = data[15:]
+ cipher = self.generate_cipher(key, iv)
+ decrypted_pass = self.decrypt_payload(cipher, payload)
+ decrypted_pass = decrypted_pass[:-16].decode()
+ return decrypted_pass
+ except Exception as e:
+ return ""
+
+ def grab_cookies(self):
+ # First we need to decrypt the Local State key
+ with open(self.local_state, 'r') as f:
+ js = json.load(f)
+ import base64
+ state_key = base64.b64decode(js['os_crypt']['encrypted_key'])[5:]
+
+ # use ctypes to decrypt DPAPI protected state key
+ from ctypes import windll, Structure, POINTER, c_char, byref, c_buffer, cdll
+ from ctypes.wintypes import DWORD
+ CryptUnprotectData = windll.crypt32.CryptUnprotectData
+ LocalFree = windll.kernel32.LocalFree
+ memcpy = cdll.msvcrt.memcpy
+
+ class DATA_BLOB(Structure):
+ _fields_ = [("cbData", DWORD), ("pbData", POINTER(c_char))]
+
+ blobOut = DATA_BLOB()
+ bufferIn = c_buffer(state_key, len(state_key))
+ blobIn = DATA_BLOB(len(state_key), bufferIn)
+ CryptUnprotectData(byref(blobIn), None, None, None, None, 0, byref(blobOut))
+ cbData = int(blobOut.cbData)
+ pbData = blobOut.pbData
+ buffer = c_buffer(cbData)
+ memcpy(buffer, pbData, cbData)
+ LocalFree(pbData);
+ key = buffer.raw
+
+ # now grab the cookies from the db
+ import sqlite3, shutil, datetime, uuid
+ class FakeObj():
+ def close():
+ pass
+ cursor = FakeObj()
+ conn = FakeObj()
+ cookies = {}
+ try:
+ temp_name = os.path.join(os.getenv('TEMP'), str(uuid.uuid4()))
+ shutil.copy(self.cookie_path, temp_name)
+ conn = sqlite3.connect(temp_name)
+ cursor = conn.cursor()
+ now = int(datetime.datetime.now().timestamp() + 11644473600 * 1000000)
+ cursor.execute("SELECT name, encrypted_value FROM cookies WHERE host_key LIKE '%.linkedin.com' AND expires_utc > ?", [now])
+ for result in cursor.fetchall():
+ name = result[0]
+ encrypted_value = result[1]
+ decrypted = self.decrypt_value(key, encrypted_value)
+ cookies[name] = decrypted
+ except Exception as e:
+ print (f'Error dumping LinkedIn cookies: {e}')
+ finally:
+ cursor.close()
+ conn.close()
+ if os.path.exists(temp_name):
+ os.remove(temp_name)
+ return cookies
+
+
+class Scraper():
+ interactive = False
+ username = ''
+ password = ''
+ prefix = ''
+ suffix = ''
+ outfile = ''
+ validate = False
+ by_company = False
+ company_id = None
+ session = requests.Session()
+ session.proxies = PROXY_SETTING
+ session.verify = SSL_VERIFY
+ user_agent ='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
+
+ def __init__(self, username, password, prefix, suffix, outfile, by_company,
+ company_id=None, validate=False, user_agent='', interactive=False):
+ self.username = username
+ self.password = password
+ self.prefix = prefix
+ self.suffix= suffix
+ self.outfile = os.path.splitext(outfile)[0]
+ self.by_company = by_company
+ self.company_id = company_id
+ self.validate = validate
+ self.interactive = interactive
+ if user_agent:
+ self.user_agent = user_agent
+ self.session.headers.update({'User-Agent': self.user_agent})
+
+ def authenticate(self):
+ r = self.session.get("https://www.linkedin.com/uas/login")
+ parse = BeautifulSoup(r.text, "html.parser")
+ csrf = parse.find("input", {"name": "loginCsrfParam"})['value']
+
+ login_data = {'session_key': self.username, 'session_password': self.password, 'loginCsrfParam': csrf}
+ r = self.session.post("https://www.linkedin.com/uas/login-submit",
+ data=login_data)
+ parse = BeautifulSoup(r.text, "html.parser")
+
+ if 'Security Verification' in r.text:
+
+ if 'captchaV2Challenge' in r.text:
+ print ("[!] Captcha detected. Try logging in using Chrome and then use the 'cookies' module (Windows only) to dump your cookies.")
+ print ("[!] The cookie file can then be provided to this script using the --cookies option.")
+ sys.exit(1)
+ else:
+ print ("[!] Login checkpoint hit. Check your email for a code (requires --interactive), or log in from the same IP and try again.")
+ print ("[!] Alternatively, log in using Chrome and then use the 'cookies' module (Windows only) to dump your cookies.")
+ print ("[!] The cookie file can then be provided to this script using the --cookies option.")
+ if not self.interactive:
+ sys.exit(1)
+
+ pin = input("[*] Check email and enter code here: ")
+ url = 'https://www.linkedin.com/checkpoint/challenge/verify'
+ data = {}
+ data['csrfToken'] = parse.find("input", {"name": "csrfToken"})['value']
+ data['pageInstance'] = parse.find("input", {"name": "pageInstance"})['value']
+ data['resendUrl'] = parse.find("input", {"name": "resendUrl"})['value']
+ data['challengeId'] = parse.find("input", {"name": "challengeId"})['value']
+ data['language'] = parse.find("input", {"name": "language"})['value']
+ data['displayTime'] = parse.find("input", {"name": "displayTime"})['value']
+ data['challengeSource'] = parse.find("input", {"name": "challengeSource"})['value']
+ data['requestSubmissionId'] = parse.find("input", {"name": "requestSubmissionId"})['value']
+ data['challengeType'] = parse.find("input", {"name": "challengeType"})['value']
+ data['challengeData'] = parse.find("input", {"name": "challengeData"})['value']
+ data['challengeDetails'] = parse.find("input", {"name": "challengeDetails"})['value']
+ data['failureRedirectUri'] = parse.find("input", {"name": "failureRedirectUri"})['value']
+ data['flowTreeId'] = parse.find("input", {"name": "flowTreeId"})['value']
+ data['signInLink'] = parse.find("input", {"name": "signInLink"})['value']
+ data['joinNowLink'] = parse.find("input", {"name": "joinNowLink"})['value']
+ data['_s'] = parse.find("input", {"name": "_s"})['value']
+ data['pin'] = int(pin)
+ r = self.session.post(url, data=data)
+
+ return self.test_auth()
+
+ def load_cookies(self, cookie_file):
+ with open(cookie_file) as f:
+ self.session.cookies.update(json.load(f))
+ return self.test_auth()
+
+ def test_auth(self):
+ # preflight request in case the user doesn't have a JSESSIONID yet
+ self.session.get('https://www.linkedin.com/feed/')
+ try:
+ self.session.headers.update({'Csrf-Token': self.session.cookies.get_dict()["JSESSIONID"].strip('"')})
+ self.session.headers.update({'X-RestLi-Protocol-Version': '2.0.0'})
+ except:
+ return False
+
+ # Now we can test against the API
+ r = self.session.get('https://www.linkedin.com/voyager/api/me', allow_redirects=False)
+ success = r.status_code == 200
+ if success:
+ print ('[+] Auth success')
else:
- url = "https://www.linkedin.com/voyager/api/search/cluster?count=40&guides=List(v->PEOPLE,facetCurrentCompany->%s)&origin=OTHER&q=guided&start=%i" % (companyID, p*40)
- #print url
- r = requests.get(url, cookies=cookies, headers=headers)
- content = r.text.encode('UTF-8')
- content = json.loads(content)
- print "[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements']))
- for c in content['elements'][0]['elements']:
- if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False:
- try:
- data_industry = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['industry']
- except:
- data_industry = ""
- data_firstname = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['firstName']
- data_lastname = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['lastName']
- data_slug = "https://www.linkedin.com/in/%s" % c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['publicIdentifier']
- data_occupation = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['occupation']
- data_location = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['location']
+ print ('[!] Auth failed')
+ return success
+
+ def get_search(self, search):
+ body = ""
+ csv = []
+ css = """
+
+ """
+
+ header = """
+
+ | Photo |
+ Name |
+ Possible Email: |
+ Job |
+ LinkedIn ID |
+
+ """
+
+ # Do we want to automatically get the company ID?
+ if self.by_company and not self.company_id:
+ # Automatic
+ # Grab from the URL
+ companyID = 0
+ url = "https://www.linkedin.com/voyager/api/typeahead/hits"
+ params = {'q': 'blended', 'query': search}
+ r = self.session.get(url, params=params)
+ content = r.json()
+ firstID = 0
+
+ for i in range(0, len(content['elements'])):
try:
- data_picture = "https://media.licdn.com/mpr/mpr/shrinknp_400_400%s" % c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.voyager.common.MediaProcessorImage']['id']
+ companyID = content['elements'][i]['hitInfo']['com.linkedin.voyager.typeahead.TypeaheadCompany']['id']
+ if firstID == 0:
+ firstID = companyID
+ print ("[Notice] Found company ID: %s" % companyID)
except:
- print "[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation)
- data_picture = ""
-
- # incase the last name is multi part, we will split it down
-
- parts = data_lastname.split()
-
- name = data_firstname + " " + data_lastname
- fname = ""
- mname = ""
- lname = ""
-
- if len(parts) == 1:
- fname = data_firstname
- mname = '?'
- lname = parts[0]
- elif len(parts) == 2:
- fname = data_firstname
- mname = parts[0]
- lname = parts[1]
- elif len(parts) >= 3:
- fname = data_firstname
- lname = parts[0]
- else:
- fname = data_firstname
- lname = '?'
+ continue
+ companyID = firstID
+ print ("[*] Using company ID: %s" % companyID)
+ if companyID == 0:
+ print ("[WARNING] No valid company ID found in auto, please restart and find your own")
+ elif self.by_company:
+ # Don't auto, use the specified ID
+ companyID = self.company_id
+ print ("[*] Using company ID: %s" % companyID)
+
+ # Fetch the initial page to get results/page counts
+ url = "https://www.linkedin.com/voyager/api/search/cluster"
+ if self.by_company:
+ params = {'count': 40, 'guides': f'List(v->PEOPLE,facetCurrentCompany->{companyID})',
+ 'origin': 'OTHER', 'q': 'guided', 'start': 1}
+ else:
+ params = {'count': 40, 'guides': 'List()', 'keywords': search, 'origin': 'OTHER', 'q': 'guided', 'start': 1}
- fname = re.sub('[^A-Za-z]+', '', fname)
- mname = re.sub('[^A-Za-z]+', '', mname)
- lname = re.sub('[^A-Za-z]+', '', lname)
+ r = self.session.get(url, params=params)
+ content = r.json()
+ data_total = content['elements'][0]['total']
- if len(fname) == 0 or len(lname) == 0:
- # invalid user, let's move on, this person has a weird name
- continue
+ # Calculate pages off final results at 40 results/page
+ pages = data_total / 40
- #come here
+ if pages == 0:
+ pages = 1
- if prefix == 'full':
- user = '{}{}{}'.format(fname, mname, lname)
- if prefix == 'firstlast':
- user = '{}{}'.format(fname, lname)
- if prefix == 'firstmlast':
- user = '{}{}{}'.format(fname, mname[0], lname)
- if prefix == 'flast':
- user = '{}{}'.format(fname[0], lname)
- if prefix == 'first.last':
- user = '{}.{}'.format(fname, lname)
- if prefix == 'fmlast':
- user = '{}{}{}'.format(fname[0], mname[0], lname)
- if prefix == 'lastfirst':
- user = '{}{}'.format(lname, fname)
-
- email = '{}@{}'.format(user, suffix)
-
- body += "" \
- " | " \
- "%s | " \
- "%s | " \
- "%s | " \
- "%s | " \
- "" % (data_slug, data_picture, data_slug, name, email, data_occupation, data_location)
- if validateEmail(suffix,email):
- csv.append('"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";")))
- foot = "
"
- f = open('{}.html'.format(outfile), 'wb')
- f.write(css)
- f.write(header)
- f.write(body)
- f.write(foot)
- f.close()
- f = open('{}.csv'.format(outfile), 'wb')
- f.writelines('\n'.join(csv))
- f.close()
+ if data_total % 40 == 0:
+ # Because we count 0... Subtract a page if there are no left over results on the last page
+ pages = pages - 1
+
+ if pages == 0:
+ print ("[!] Try to use quotes in the search name")
+ sys.exit(0)
+
+ print ("[*] %i Results Found" % data_total)
+ if data_total > 1000:
+ pages = 25
+ print ("[*] LinkedIn only allows 1000 results. Refine keywords to capture all data")
+ print ("[*] Fetching %i Pages" % pages)
+
+ for p in range(1, pages):
+ # Request results for each page using the start offset
+ if self.by_company:
+ params = {'count': 40, 'guides': f'List(v->PEOPLE,facetCurrentCompany->{companyID})',
+ 'origin': 'OTHER', 'q': 'guided', 'start': p*40}
else:
- print "[!] Headless profile found. Skipping"
- print
-
-def validateEmail(domain,email):
- """
- Functionality and Code was adapted from the SimplyEmail Project: https://github.com/SimplySecurity/SimplyEmail
- """
- #Setting Variables
- UserAgent = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
- mxhost = ""
- FinalList = []
- hostname = socket.gethostname()
+ params = {'count': 40, 'guides': 'List()', 'keywords': search, 'origin': 'OTHER', 'q': 'guided', 'start': p*40 }
+
+ r = self.session.get(url, params=params)
+ content = r.json()
+ print ("[*] Fetching page %i with %i results" % ((p), len(content['elements'][0]['elements'])))
+ for c in content['elements'][0]['elements']:
+ if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False:
+ try:
+ data_industry = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['industry']
+ except:
+ data_industry = ""
+
+ data_firstname = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['firstName']
+ data_lastname = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['lastName']
+ data_slug = "https://www.linkedin.com/in/%s" % c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['publicIdentifier']
+ data_occupation = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['occupation']
+ data_id = c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['backendUrn'].split(":")[3]
+
+ try:
+ data_picture = "https://media.licdn.com/mpr/mpr/shrinknp_400_400%s" % c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['miniProfile']['picture']['com.linkedin.voyager.common.MediaProcessorImage']['id']
+ except:
+ print ("[*] No picture found for %s %s, %s" % (data_firstname, data_lastname, data_occupation))
+ data_picture = ""
+
+ # incase the last name is multi part, we will split it down
+ parts = data_lastname.split()
+ name = data_firstname + " " + data_lastname
+ fname = ""
+ mname = ""
+ lname = ""
+
+ if len(parts) == 1:
+ fname = data_firstname
+ mname = '?'
+ lname = parts[0]
+ elif len(parts) == 2:
+ fname = data_firstname
+ mname = parts[0]
+ lname = parts[1]
+ elif len(parts) >= 3:
+ fname = data_firstname
+ lname = parts[0]
+ else:
+ fname = data_firstname
+ lname = '?'
+
+ fname = re.sub('[^A-Za-z]+', '', fname)
+ mname = re.sub('[^A-Za-z]+', '', mname)
+ lname = re.sub('[^A-Za-z]+', '', lname)
+
+ if len(fname) == 0 or len(lname) == 0:
+ # invalid user, let's move on, this person has a weird name
+ continue
+
+ #come here
+
+ if self.prefix == 'full':
+ user = '{}{}{}'.format(fname, mname, lname)
+ if self.prefix == 'firstlast':
+ user = '{}{}'.format(fname, lname)
+ if self.prefix == 'firstmlast':
+ user = '{}{}{}'.format(fname, mname[0], lname)
+ if self.prefix == 'flast':
+ user = '{}{}'.format(fname[0], lname)
+ if self.prefix == 'first.last':
+ user = '{}.{}'.format(fname, lname)
+ if self.prefix == 'fmlast':
+ user = '{}{}{}'.format(fname[0], mname[0], lname)
+ if self.prefix == 'lastfirst':
+ user = '{}{}'.format(lname, fname)
+
+ email = '{}@{}'.format(user, self.suffix)
+
+ body += "" \
+ " | " \
+ "%s | " \
+ "%s | " \
+ "%s | " \
+ "%s | " \
+ "" % (data_slug, data_picture, data_slug, name, email, data_occupation, data_id)
+
+ if self.validate and validate_email(self.suffix, email):
+ csv.append(b'"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_id))
+
+ foot = "
"
+ with open('{}.html'.format(self.outfile), 'wb') as f:
+ f.write(css.encode())
+ f.write(header.encode())
+ f.write(body.encode())
+ f.write(foot.encode())
+
+ with open('{}.csv'.format(self.outfile), 'wb') as f:
+ f.writelines('\n'.join(csv))
+ else:
+ print ("[!] Headless profile found. Skipping")
+
+ def validate_email(self, domain,email):
+ """
+ Functionality and Code was adapted from the SimplyEmail Project: https://github.com/SimplySecurity/SimplyEmail
+ """
+ #Setting Variables
+ mxhost = ""
+ FinalList = []
+ hostname = socket.gethostname()
- #Getting MX Record
- MXRecord = []
- try:
- print ' [*] Attempting to resolve MX records!'
- answers = dns.resolver.query(domain, 'MX')
- for rdata in answers:
- data = {
- "Host": str(rdata.exchange),
- "Pref": int(rdata.preference),
- }
- MXRecord.append(data)
- # Now find the lowest value in the pref
- Newlist = sorted(MXRecord, key=lambda k: k['Pref'])
- # Set the MX record
- mxhost = Newlist[0]
- val = ' [*] MX Host: ' + str(mxhost['Host'])
- print val
- except Exception as e:
- error = ' [!] Failed to get MX record: ' + str(e)
- print error
-
- #Checking Email Address
- socket.setdefaulttimeout(10)
- server = smtplib.SMTP(timeout=10)
- server.set_debuglevel(0)
- try:
- print " [*] Checking for valid email: " + str(email)
- server.connect(mxhost['Host'])
- server.helo(hostname)
- server.mail('email@gmail.com')
- code,message = server.rcpt(str(email))
- server.quit()
- except Exception as e:
- print e
+ #Getting MX Record
+ MXRecord = []
+ try:
+ print (' [*] Attempting to resolve MX records!')
+ answers = dns.resolver.query(domain, 'MX')
+ for rdata in answers:
+ data = {
+ "Host": str(rdata.exchange),
+ "Pref": int(rdata.preference),
+ }
+ MXRecord.append(data)
+ # Now find the lowest value in the pref
+ Newlist = sorted(MXRecord, key=lambda k: k['Pref'])
+ # Set the MX record
+ mxhost = Newlist[0]
+ val = ' [*] MX Host: ' + str(mxhost['Host'])
+ print (val)
+ except Exception as e:
+ error = ' [!] Failed to get MX record: ' + str(e)
+ print (error)
+
+ #Checking Email Address
+ socket.setdefaulttimeout(10)
+ server = smtplib.SMTP(timeout=10)
+ server.set_debuglevel(0)
+ try:
+ print (" [*] Checking for valid email: " + str(email))
+ server.connect(mxhost['Host'])
+ server.helo(hostname)
+ server.mail('email@gmail.com')
+ code, message = server.rcpt(str(email))
+ server.quit()
+ except Exception as e:
+ print (e)
- if code == 250:
- #print "Valid Email Address Found: %s" % email
- return True
- else:
- #print "Email not valid %s" % email
+ if code == 250:
+ #print ("Valid Email Address Found: %s" % email
+ return True
+ else:
+ #print ("Email not valid %s" % email
+ return False
+
+
+def lookup_prefix(api_key, suffix):
+ #if auto prefix then we want to use hunter IO to find it.
+ url = "https://api.hunter.io/v2/domain-search?domain=%s&api_key=%s" % (suffix, api_key)
+ r = requests.get(url, proxies=PROXY_SETTING, verify=SSL_VERIFY)
+ content = r.json()
+
+ if "status" in content:
+ print ("[!] Rate limited by Hunter IO Key")
return False
+ prefix = content['data']['pattern']
+ print ("[!] %s" % prefix)
+ if prefix:
+ prefix = prefix.replace("{","").replace("}", "")
+ if prefix in PREFIX_CHOICES:
+ print ("[+] Found %s prefix" % prefix)
+ return prefix
+ return False
+
def banner():
- with open('banner.txt', 'r') as f:
- data = f.read()
-
- print "\033[1;31m%s\033[0;0m" % data
- print "\033[1;34mProviding you with Linkedin Intelligence"
- print "\033[1;32mAuthor: Vincent Yiu (@vysec, @vysecurity)\033[0;0m"
- print "\033[1;32mOriginal version by @DisK0nn3cT\033[0;0m"
-
-def authenticate():
- try:
- a = login()
- print a
- session = a
- if len(session) == 0:
- sys.exit("[!] Unable to login to LinkedIn.com")
- print "[*] Obtained new session: %s" % session
- cookies = dict(li_at=session)
- except Exception, e:
- sys.exit("[!] Could not authenticate to linkedin. %s" % e)
- return cookies
+ with open('banner.txt', 'rb') as f:
+ data = f.read()
-if __name__ == '__main__':
- banner()
- # Prompt user for data variables
- search = args.keywords if args.keywords!=None else raw_input("[*] Enter search Keywords (use quotes for more percise results)\n")
- print
- outfile = args.output if args.output!=None else raw_input("[*] Enter filename for output (exclude file extension)\n")
- print
- while True:
- bCompany = raw_input("[*] Filter by Company? (Y/N): \n")
- if bCompany.lower() == "y" or bCompany.lower() == "n":
- break
- else:
- print "[!] Incorrect choice"
+ print ("\033[1;31m%s\033[0;0m" % data.decode())
+ print ("\033[1;34mProviding you with Linkedin Intelligence")
+ print ("\033[1;32mAuthor: Vincent Yiu (@vysec, @vysecurity)\033[0;0m")
+ print ("\033[1;32mOriginal version by @DisK0nn3cT\033[0;0m")
- if bCompany.lower() == "y":
- bCompany = True
- else:
- bCompany = False
-
- bAuto = True
- bSpecific = 0
- prefix = ""
- suffix = ""
-
- print
-
- if bCompany:
- while True:
- bSpecific = raw_input("[*] Specify a Company ID (Provide ID or leave blank to automate): \n")
- if bSpecific != "":
- bAuto = False
- if bSpecific != 0:
- try:
- int(bSpecific)
- break
- except:
- print "[!] Incorrect choice, the ID either has to be a number or blank"
-
- else:
- print "[!] Incorrect choice, the ID either has to be a number or blank"
- else:
- bAuto = True
- break
-
- print
+if __name__ == '__main__':
+ banner()
+ parser = argparse.ArgumentParser(description='Discovery LinkedIn')
+ parsers = parser.add_subparsers(dest='subparser_name', title='module')
+ parsers.required = True
- while True:
- suffix = raw_input("[*] Enter e-mail domain suffix (eg. contoso.com): \n")
- suffix = suffix.lower()
- if "." in suffix:
- break
+ scrape_parser = parsers.add_parser("scrape", help="Scrape LinkedIn for a target company or keyword")
+ scrape_parser.add_argument('-u', '--username', help='Username')
+ scrape_parser.add_argument('-p', '--password', help='Password')
+ scrape_parser.add_argument('-c', '--cookies', help='Cookie file to use (dump with cookies module)')
+ scrape_parser.add_argument('-a', '--api-key', help='Hunter API Key', required=False)
+ scrape_parser.add_argument('-s', '--search', help='Search Keywords (use quotes for more percise results)', required=True)
+ scrape_parser.add_argument('-b', '--by-company', help='Filter by Company', action=argparse.BooleanOptionalAction)
+ scrape_parser.add_argument('-v', '--validate', help='Validate e-mails', action=argparse.BooleanOptionalAction)
+ scrape_parser.add_argument('-i', '--company-id', help='Company ID', required=False)
+ scrape_parser.add_argument('--suffix', help='Suffix for e-mail generation (e.g. example.com)', required=True)
+ scrape_parser.add_argument('--prefix', help='Prefix for e-mail generation', default='auto', choices=PREFIX_CHOICES)
+ scrape_parser.add_argument('-o', '--output', help='Output file (do not include extentions)', required=True)
+ scrape_parser.add_argument('--user-agent', help='Custom User-Agent', default='')
+ scrape_parser.add_argument('--interactive', help='Interactive prompt', action=argparse.BooleanOptionalAction)
+
+ cookie_parser = parsers.add_parser("cookies", help="Extracts LinkedIn Cookies from Chrome for use with this script.")
+ cookie_parser.add_argument('-o', '--output', help='Output file (do not include extentions)', required=True)
+
+ args = parser.parse_args()
+
+ # Disable HTTPS warnings from requests >= 2.16.0 library
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+ if args.subparser_name == 'scrape':
+
+ if args.prefix == "auto":
+ if not args.api_key:
+ print ("[!] No API key given. Please provide with the --api-key parameter.")
+ sys.exit(1)
+ prefix = lookup_prefix(args.api_key, args.suffix)
+ if not prefix:
+ print ("[!] Automatic prefix search failed, please insert a manual choice")
+ sys.exit(1)
else:
- print "[!] Incorrect e-mail? There's no dot"
-
- print
-
- while True:
- prefix = raw_input("[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,first.last,fmlast,lastfirst): \n")
- prefix = prefix.lower()
- print
- if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
- break
- elif prefix == "auto":
- #if auto prefix then we want to use hunter IO to find it.
- print "[*] Automaticly using Hunter IO to determine best Prefix"
- url = "https://hunter.io/trial/v2/domain-search?offset=0&domain=%s&format=json" % suffix
- r = requests.get(url)
- content = json.loads(r.text)
- if "status" in content:
- print "[!] Rate limited by Hunter IO trial"
- url = "https://api.hunter.io/v2/domain-search?domain=%s&api_key=%s" % (suffix, api_key)
- #print url
- r = requests.get(url)
- content = json.loads(r.text)
- if "status" in content:
- print "[!] Rate limited by Hunter IO Key"
- continue
- #print content
- prefix = content['data']['pattern']
- print "[!] %s" % prefix
- if prefix:
- prefix = prefix.replace("{","").replace("}", "")
- if prefix == "full" or prefix == "firstlast" or prefix == "firstmlast" or prefix == "flast" or prefix =="first" or prefix == "first.last" or prefix == "fmlast" or prefix == "lastfirst":
- print "[+] Found %s prefix" % prefix
- break
- else:
- print "[!] Automatic prefix search failed, please insert a manual choice"
- continue
+ prefix = args.prefix
+
+ if args.cookies and (args.username or args.password):
+ print ('[!] Specify either a username/password combo OR a cookie file')
+ sys.exit(1)
+ elif not args.username and not args.password and not args.cookies:
+ print ('[!] Please provide both username and password')
+ sys.exit(1)
+
+ scraper = Scraper(args.username, args.password, prefix, args.suffix, args.output,
+ args.by_company, args.company_id, args.validate, args.user_agent, args.interactive)
+
+ # Skip auth if required
+ if args.cookies:
+ if os.path.exists(args.cookies):
+ print (f'[*] Loading cookie file: {args.cookies}')
+ if not scraper.load_cookies(args.cookies):
+ print ('[!] Cookies are invalid. Login using Chrome and dump again.')
+ sys.exit(1)
else:
- print "[!] Automatic prefix search failed, please insert a manual choice"
- continue
- else:
- print "[!] Incorrect choice, please select a value from (auto,full,firstlast,firstmlast,flast,first.last,fmlast)"
+ print ('[!] Cookie file not found')
+ sys.exit(1)
+ elif not scraper.authenticate():
+ print ('Failed password authentication')
+ sys.exit(1)
+
+ scraper.get_search(args.search)
+ print ("[+] Complete")
- print
+ elif args.subparser_name == 'cookies':
+ dumper = CookieDumper()
+ cookies = dumper.grab_cookies()
+ if not cookies:
+ print ("Cookie dumping failed")
+ sys.exit(1)
+ outfile = os.path.splitext(args.output)[0] + '.json'
+ with open(outfile, 'w') as f:
+ json.dump(cookies, f)
+ print (f"[+] Cookie dump success. Saved to {outfile}")
-
- # URL Encode for the querystring
- search = urllib.quote_plus(search)
- cookies = authenticate()
-
-
- # Initialize Scraping
- get_search()
-
- print "[+] Complete"
+ else:
+ parser.print_help()
diff --git a/README.md b/README.md
index d5ea3c2..fc386b0 100644
--- a/README.md
+++ b/README.md
@@ -5,9 +5,9 @@ Original Scraper by Danny Chrastil (@DisK0nn3cT): https://github.com/DisK0nn3cT/
Modified by @vysecurity
# Requirements
+
```
-pip install beautifulsoup4
-pip install thready
+pip install -r requirements.txt
```
# Change Log
@@ -18,10 +18,24 @@ Additions:
* Constrain to company filters
* Addition of Hunter for e-mail prediction
+[v0.2 BETA 06-10-2022]
+* Ported to Python 3
+* Added `requirements.txt`
+* Refactored the code to a re-usable class and modularised the arg parsing
+* Options are now given on the command line so that it can be run without a user present
+* Fixed a bug in paging calculation that caused a 403
+* Removed Location field as it's not exposed by the API
+* Added LinkedIn ID field
+* Added a `cookies` module which can be used to bypass Captcha by first logging in with Chrome and then dumping cookies (Windows only)
+* Added option to supply a verification code if one is required on login (when `--interative` is used)
+* Make email validation optional
+* Optionally provide custom user-agent
+
# To-Do List
* Allow for horizontal scraping and mass automated company domain, and format prediction per company
* Add Natural Language Processing techniques on titles to discover groups of similar titles to be stuck into same "department". This should then be visualised in a graph.
+* Add a cookie dumper for Linux/macOS and support Microsoft Edge
# Usage
@@ -29,9 +43,12 @@ Put in LinkedIn credentials in LinkedInt.py
Put Hunter.io API key in LinkedInt.py
Run LinkedInt.py and follow instructions
-# Example
+# Usage
+
+## Options
```
+>python LinkedInt.py -h
██╗ ██╗███╗ ██╗██╗ ██╗███████╗██████╗ ██╗███╗ ██╗████████╗
██║ ██║████╗ ██║██║ ██╔╝██╔════╝██╔══██╗██║████╗ ██║╚══██╔══╝
██║ ██║██╔██╗ ██║█████╔╝ █████╗ ██║ ██║██║██╔██╗ ██║ ██║
@@ -42,25 +59,104 @@ Run LinkedInt.py and follow instructions
Providing you with Linkedin Intelligence
Author: Vincent Yiu (@vysec, @vysecurity)
Original version by @DisK0nn3cT
-[*] Enter search Keywords (use quotes for more percise results)
-"General Motors"
+usage: LinkedInt.py [-h] {scrape,cookies} ...
+
+Discovery LinkedIn
+
+optional arguments:
+ -h, --help show this help message and exit
+
+module:
+ {scrape,cookies}
+ scrape Scrape LinkedIn for a target company or keyword
+ cookies Extracts LinkedIn Cookies from Chrome for use with this script.
+
+-- SNIP --
+
+usage: LinkedInt.py cookies [-h] -o OUTPUT
+
+optional arguments:
+ -h, --help show this help message and exit
+ -o OUTPUT, --output OUTPUT
+ Output file (do not include extentions)
+-- SNIP --
+
+usage: LinkedInt.py scrape [-h] [-u USERNAME] [-p PASSWORD] [-c COOKIES] [-a API_KEY] -s SEARCH [-b | --by-company | --no-by-company] [-v | --validate | --no-validate] [-i COMPANY_ID] --suffix SUFFIX
+ [--prefix {auto,full,firstlast,firstmlast,flast,first,first.last,fmlast,lastfirst}] -o OUTPUT [--user-agent USER_AGENT] [--interactive | --no-interactive]
+
+optional arguments:
+ -h, --help show this help message and exit
+ -u USERNAME, --username USERNAME
+ Username
+ -p PASSWORD, --password PASSWORD
+ Password
+ -c COOKIES, --cookies COOKIES
+ Cookie file to use (dump with cookies module)
+ -a API_KEY, --api-key API_KEY
+ Hunter API Key
+ -s SEARCH, --search SEARCH
+ Search Keywords (use quotes for more percise results)
+ -b, --by-company, --no-by-company
+ Filter by Company
+ -v, --validate, --no-validate
+ Validate e-mails
+ -i COMPANY_ID, --company-id COMPANY_ID
+ Company ID
+ --suffix SUFFIX Suffix for e-mail generation (e.g. example.com)
+ --prefix {auto,full,firstlast,firstmlast,flast,first,first.last,fmlast,lastfirst}
+ Prefix for e-mail generation
+ -o OUTPUT, --output OUTPUT
+ Output file (do not include extentions)
+ --user-agent USER_AGENT
+ Custom User-Agent
+ --interactive, --no-interactive
+ Interactive prompt
+
+```
+
+## Example
-[*] Enter filename for output (exclude file extension)
-generalmotors
+The following example shows how to first dump your LinkedIn cookies from Chrome and then use them to carry out a search.
-[*] Filter by Company? (Y/N):
-Y
+First we need to log in to LinkedIn using Chrome (on Windows). Then we run the following:
-[*] Specify a Company ID (Provide ID or leave blank to automate):
+```
+> python .\LinkedInt.py cookies -o cookies
+██╗ ██╗███╗ ██╗██╗ ██╗███████╗██████╗ ██╗███╗ ██╗████████╗
+██║ ██║████╗ ██║██║ ██╔╝██╔════╝██╔══██╗██║████╗ ██║╚══██╔══╝
+██║ ██║██╔██╗ ██║█████╔╝ █████╗ ██║ ██║██║██╔██╗ ██║ ██║
+██║ ██║██║╚██╗██║██╔═██╗ ██╔══╝ ██║ ██║██║██║╚██╗██║ ██║
+███████╗██║██║ ╚████║██║ ██╗███████╗██████╔╝██║██║ ╚████║ ██║
+╚══════╝╚═╝╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚═════╝ ╚═╝╚═╝ ╚═══╝ ╚═╝
+Providing you with Linkedin Intelligence
+Author: Vincent Yiu (@vysec, @vysecurity)
+Original version by @DisK0nn3cT
+[+] Cookie dump success. Saved to cookies.json
+```
-[*] Enter e-mail domain suffix (eg. contoso.com):
-gm.com
+Now we can load the cookies into `LinkedInt` and carry out a search:
-[*] Select a prefix for e-mail generation (auto,full,firstlast,firstmlast,flast,first.last,fmlast):
-auto
+```
+> python .\LinkedInt.py scrape -o example --no-validate -s "Example Company" --by-company --prefix "first.last" --suffix example.com --cookies .\cookies.json
+██╗ ██╗███╗ ██╗██╗ ██╗███████╗██████╗ ██╗███╗ ██╗████████╗
+██║ ██║████╗ ██║██║ ██╔╝██╔════╝██╔══██╗██║████╗ ██║╚══██╔══╝
+██║ ██║██╔██╗ ██║█████╔╝ █████╗ ██║ ██║██║██╔██╗ ██║ ██║
+██║ ██║██║╚██╗██║██╔═██╗ ██╔══╝ ██║ ██║██║██║╚██╗██║ ██║
+███████╗██║██║ ╚████║██║ ██╗███████╗██████╔╝██║██║ ╚████║ ██║
+╚══════╝╚═╝╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚═════╝ ╚═╝╚═╝ ╚═══╝ ╚═╝
-[*] Automaticly using Hunter IO to determine best Prefix
-[!] {first}.{last}
-[+] Found first.last prefix
+Providing you with Linkedin Intelligence
+Author: Vincent Yiu (@vysec, @vysecurity)
+Original version by @DisK0nn3cT
+[*] Loading cookie file: .\cookies.json
+[+] Auth success
+[Notice] Found company ID: 123456
+[Notice] Found company ID: 789012
+[Notice] Found company ID: 987654
+[*] Using company ID: 123456
+[*] 1002153 Results Found
+[*] LinkedIn only allows 1000 results. Refine keywords to capture all data
+[*] Fetching 25 Pages
+[*] Fetching page 1 with 40 results
```
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..4265934
Binary files /dev/null and b/requirements.txt differ