Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion LinkedInt.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ def get_search():
print "[*] Fetching %i Pages" % pages
print

jsondata = {'search': search, 'results':[]}

for p in range(pages):
# Request results for each page using the start offset
if bCompany == False:
Expand All @@ -196,6 +198,7 @@ def get_search():
r = requests.get(url, cookies=cookies, headers=headers)
content = r.text.encode('UTF-8')
content = json.loads(content)
# print content
print "[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements']))
for c in content['elements'][0]['elements']:
if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False:
Expand Down Expand Up @@ -274,6 +277,7 @@ def get_search():
"<a>" % (data_slug, data_picture, data_slug, name, email, data_occupation, data_location)

csv.append('"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";")))
jsondata['results'].append({'link': data_slug, 'picture': data_picture, 'firstname': data_firstname, 'lastname': data_lastname, 'name': name, 'email': email, 'occupation': data_occupation, 'location': data_location})
foot = "</table></center>"
f = open('{}.html'.format(outfile), 'wb')
f.write(css)
Expand All @@ -284,9 +288,14 @@ def get_search():
f = open('{}.csv'.format(outfile), 'wb')
f.writelines('\n'.join(csv))
f.close()

else:
print "[!] Headless profile found. Skipping"
print

# Write out JSON
with open('{}.json'.format(outfile), 'wb') as f:
json.dump(jsondata, f)

def banner():
with open('banner.txt', 'r') as f:
Expand Down Expand Up @@ -417,4 +426,4 @@ def authenticate():
# Initialize Scraping
get_search()

print "[+] Complete"
print "[+] Complete"
54 changes: 54 additions & 0 deletions parsejson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Parse JSON results and guess departments


#!/usr/bin/python
import json, argparse, re, collections

parser = argparse.ArgumentParser(description='LinedInt output parser')
parser.add_argument('-i', '--infile', help='Keywords to search')
args = parser.parse_args()

with open(args.infile) as data_file:
data = json.load(data_file)

groups = collections.OrderedDict([
('C Level', ['chief executive', r'(\W|^)C[A-Z]O(\W|$)']),
('HR', ['recruitment']),
('Sales and Marketing', ['accounts? manager','sales','marketing']),
('Delivery', ['analyst', 'consultant','technician','developer','programmer','tester','assurance']),
('Administrative', ['project manager','project delivery','receptionist',r'assistant','^PA ']),
('Directors', ['director'])
])

categorised = {}

print 'Search: ' + data['search']

for d in data['results']:
category = None
occ = re.sub(' at ' + data['search'] + '.*', '', d['occupation'])
for groupname, g in groups.iteritems():
if category: continue
for regex in g:
if category: continue
if re.search( regex, occ, re.IGNORECASE ):
print d['name'] + ' is in ' + groupname + ' ('+regex+') "'+occ+'"'
category = groupname
if not category:
category = 'Unknown'

if not category in categorised.keys():
categorised[category] = []

categorised[category].append(d)

keys = groups.keys()
keys.append('Unknown')
for g in keys:
if g not in categorised.keys(): continue
people = categorised[g]
print ''
print '\033[1m' + g + '\033[0m'
for p in people:
print p['name']