mdsecactivebreach · strawp · Jul 13, 2017 · Jul 13, 2017
diff --git a/LinkedInt.py b/LinkedInt.py
@@ -186,6 +186,8 @@ def get_search():
     print "[*] Fetching %i Pages" % pages
     print
 
+    jsondata = {'search': search, 'results':[]}
+
     for p in range(pages):
         # Request results for each page using the start offset
         if bCompany == False:
@@ -196,6 +198,7 @@ def get_search():
         r = requests.get(url, cookies=cookies, headers=headers)
         content = r.text.encode('UTF-8')
         content = json.loads(content)
+        # print content
         print "[*] Fetching page %i with %i results" % ((p),len(content['elements'][0]['elements']))
         for c in content['elements'][0]['elements']:
             if 'com.linkedin.voyager.search.SearchProfile' in c['hitInfo'] and c['hitInfo']['com.linkedin.voyager.search.SearchProfile']['headless'] == False:
@@ -274,6 +277,7 @@ def get_search():
                     "<a>" % (data_slug, data_picture, data_slug, name, email, data_occupation, data_location)
 
                 csv.append('"%s","%s","%s","%s","%s", "%s"' % (data_firstname, data_lastname, name, email, data_occupation, data_location.replace(",",";")))
+                jsondata['results'].append({'link': data_slug, 'picture': data_picture, 'firstname': data_firstname, 'lastname': data_lastname, 'name': name, 'email': email, 'occupation': data_occupation, 'location': data_location})
                 foot = "</table></center>"
                 f = open('{}.html'.format(outfile), 'wb')
                 f.write(css)
@@ -284,9 +288,14 @@ def get_search():
                 f = open('{}.csv'.format(outfile), 'wb')
                 f.writelines('\n'.join(csv))
                 f.close()
+
             else:
                 print "[!] Headless profile found. Skipping"
         print
+
+        # Write out JSON
+        with open('{}.json'.format(outfile), 'wb') as f:
+            json.dump(jsondata, f)
 
 def banner():
         with open('banner.txt', 'r') as f:
@@ -417,4 +426,4 @@ def authenticate():
     # Initialize Scraping
     get_search()
 
-    print "[+] Complete"
+    print "[+] Complete"
diff --git a/parsejson.py b/parsejson.py
@@ -0,0 +1,54 @@
+# Parse JSON results and guess departments
+
+
+#!/usr/bin/python
+import json, argparse, re, collections
+
+parser = argparse.ArgumentParser(description='LinedInt output parser')
+parser.add_argument('-i', '--infile', help='Keywords to search')
+args = parser.parse_args()
+
+with open(args.infile) as data_file:    
+    data = json.load(data_file)
+
+groups = collections.OrderedDict([
+  ('C Level', ['chief executive', r'(\W|^)C[A-Z]O(\W|$)']),
+  ('HR', ['recruitment']),
+  ('Sales and Marketing', ['accounts? manager','sales','marketing']),
+  ('Delivery', ['analyst', 'consultant','technician','developer','programmer','tester','assurance']),
+  ('Administrative', ['project manager','project delivery','receptionist',r'assistant','^PA ']),
+  ('Directors', ['director'])
+])
+
+categorised = {}
+
+print 'Search: ' + data['search']
+
+for d in data['results']:
+  category = None
+  occ = re.sub(' at ' + data['search'] + '.*', '', d['occupation'])
+  for groupname, g in groups.iteritems():
+    if category: continue
+    for regex in g:
+      if category: continue
+      if re.search( regex, occ, re.IGNORECASE ):
+        print d['name'] + ' is in ' + groupname + ' ('+regex+') "'+occ+'"'
+        category = groupname
+  if not category:
+    category = 'Unknown'
+
+  if not category in categorised.keys():
+    categorised[category] = []
+
+  categorised[category].append(d)
+
+keys = groups.keys()
+keys.append('Unknown')
+for g in keys:
+  if g not in categorised.keys(): continue
+  people = categorised[g]
+  print ''
+  print '\033[1m' + g + '\033[0m'
+  for p in people:
+    print p['name']
+