From ebc71eb243bf60f43cc70153870fe5841ad42262 Mon Sep 17 00:00:00 2001 From: strawp Date: Thu, 13 Jul 2017 13:54:52 +0100 Subject: [PATCH 1/2] Added json output --- LinkedInt.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/LinkedInt.py b/LinkedInt.py index 45bbc11..04dbffb 100644 --- a/LinkedInt.py +++ b/LinkedInt.py @@ -186,6 +186,8 @@ def get_search(): print "[*] Fetching %i Pages" % pages print + jsondata = {'search': search, 'results':[]} + for p in range(pages): # Request results for each page using the start offset if bCompany == False: @@ -196,6 +198,7 @@ def get_search(): r = requests.get(url, cookies=cookies, headers=headers) content = r.text.encode('UTF-8') content = json.loads(content) + # print content print "[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements'])) for c in content['elements'][0]['elements']: if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False: @@ -274,6 +277,7 @@ def get_search(): "" % (data_slug, data_picture, data_slug, name, email, data_occupation, data_location) csv.append('"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";"))) + jsondata['results'].append({'link': data_slug, 'picture': data_picture, 'firstname': data_firstname, 'lastname': data_lastname, 'name': name, 'email': email, 'occupation': data_occupation, 'location': data_location}) foot = "" f = open('{}.html'.format(outfile), 'wb') f.write(css) @@ -284,9 +288,14 @@ def get_search(): f = open('{}.csv'.format(outfile), 'wb') f.writelines('\n'.join(csv)) f.close() + else: print "[!] Headless profile found. Skipping" print + + # Write out JSON + with open('{}.json'.format(outfile), 'wb') as f: + json.dump(jsondata, f) def banner(): with open('banner.txt', 'r') as f: @@ -417,4 +426,4 @@ def authenticate(): # Initialize Scraping get_search() - print "[+] Complete" \ No newline at end of file + print "[+] Complete" From 4959522ef108afd98f57efa918dda36bb8c90ca7 Mon Sep 17 00:00:00 2001 From: strawp Date: Thu, 13 Jul 2017 13:56:59 +0100 Subject: [PATCH 2/2] Added parsejson.py --- parsejson.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 parsejson.py diff --git a/parsejson.py b/parsejson.py new file mode 100644 index 0000000..7014b8b --- /dev/null +++ b/parsejson.py @@ -0,0 +1,54 @@ +# Parse JSON results and guess departments + + +#!/usr/bin/python +import json, argparse, re, collections + +parser = argparse.ArgumentParser(description='LinedInt output parser') +parser.add_argument('-i', '--infile', help='Keywords to search') +args = parser.parse_args() + +with open(args.infile) as data_file: + data = json.load(data_file) + +groups = collections.OrderedDict([ + ('C Level', ['chief executive', r'(\W|^)C[A-Z]O(\W|$)']), + ('HR', ['recruitment']), + ('Sales and Marketing', ['accounts? manager','sales','marketing']), + ('Delivery', ['analyst', 'consultant','technician','developer','programmer','tester','assurance']), + ('Administrative', ['project manager','project delivery','receptionist',r'assistant','^PA ']), + ('Directors', ['director']) +]) + +categorised = {} + +print 'Search: ' + data['search'] + +for d in data['results']: + category = None + occ = re.sub(' at ' + data['search'] + '.*', '', d['occupation']) + for groupname, g in groups.iteritems(): + if category: continue + for regex in g: + if category: continue + if re.search( regex, occ, re.IGNORECASE ): + print d['name'] + ' is in ' + groupname + ' ('+regex+') "'+occ+'"' + category = groupname + if not category: + category = 'Unknown' + + if not category in categorised.keys(): + categorised[category] = [] + + categorised[category].append(d) + +keys = groups.keys() +keys.append('Unknown') +for g in keys: + if g not in categorised.keys(): continue + people = categorised[g] + print '' + print '\033[1m' + g + '\033[0m' + for p in people: + print p['name'] +