import boto3
import botocore
import json
import pandas as pd
import utils.load_data_util
# Pandas Display Settings to allow the dataframe to display in one view
pd.set_option('display.max_columns', 500)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_rows', 50000)
s3 = boto3.resource('s3')
# Helper function to trim the json files into a proper json format
def process_string(data):
return "[" + data[1:-1] + "]"
#Helper function to count the occurance of a given key
def count_key(data, key, key_value_count):
for site in data :
key_value = site[key]
key_value_count[key_value] = key_value_count.get(key_value, 0) + 1
result = utils.load_data_util.load_random_data(50)
unique_args = result.arguments.unique()
count = 0
with open("uniqueArgs.txt", "wb") as f:
for arg in unique_args:
count += 1
f.write((str(arg)+"\n").encode("utf-8"))
grouped_by_symbol = result.groupby(['symbol']).count()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
arguments |
call_stack |
crawl_id |
file_number |
func_name |
in_iframe |
location |
operation |
script_col |
script_line |
script_loc_eval |
script_url |
time_stamp |
value |
| symbol |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| CanvasRenderingContext2D.fillRect |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| CanvasRenderingContext2D.fillStyle |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
| CanvasRenderingContext2D.textBaseline |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| HTMLCanvasElement.getContext |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
| HTMLCanvasElement.height |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| HTMLCanvasElement.style |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| HTMLCanvasElement.width |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| RTCPeerConnection.iceGatheringState |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
| RTCPeerConnection.idpLoginUrl |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| RTCPeerConnection.localDescription |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
| RTCPeerConnection.onicecandidate |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
| RTCPeerConnection.onremovestream |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| RTCPeerConnection.peerIdentity |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| RTCPeerConnection.remoteDescription |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
| RTCPeerConnection.signalingState |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
| window.Storage.getItem |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
182 |
| window.Storage.key |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
| window.Storage.length |
0 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
| window.Storage.removeItem |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
35 |
| window.Storage.setItem |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
49 |
| window.document.cookie |
0 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
479 |
| window.localStorage |
0 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
94 |
| window.name |
0 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
31 |
| window.navigator.appCodeName |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
| window.navigator.appName |
0 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
20 |
| window.navigator.appVersion |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| window.navigator.cookieEnabled |
0 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
14 |
| window.navigator.language |
0 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
21 |
| window.navigator.mimeTypes[application/futuresplash].type |
0 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
4 |
| window.navigator.mimeTypes[application/x-shockwave-flash].type |
0 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
3 |
| window.navigator.onLine |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| window.navigator.platform |
0 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
23 |
| window.navigator.plugins[Shockwave Flash].description |
0 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
39 |
| window.navigator.plugins[Shockwave Flash].filename |
0 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
| window.navigator.plugins[Shockwave Flash].length |
0 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
9 |
| window.navigator.plugins[Shockwave Flash].name |
0 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
10 |
| window.navigator.plugins[Shockwave Flash].version |
0 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
| window.navigator.product |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
| window.navigator.productSub |
0 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
| window.navigator.userAgent |
0 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
258 |
| window.navigator.vendor |
0 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
7 |
| window.navigator.vendorSub |
0 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| window.screen.colorDepth |
0 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
22 |
| window.screen.pixelDepth |
0 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
5 |
| window.sessionStorage |
0 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
65 |
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
crawl_id |
file_number |
in_iframe |
| crawl_id |
NaN |
NaN |
NaN |
| file_number |
NaN |
1.000000 |
0.137485 |
| in_iframe |
NaN |
0.137485 |
1.000000 |