-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathuser_generator.py
More file actions
64 lines (56 loc) · 2.29 KB
/
user_generator.py
File metadata and controls
64 lines (56 loc) · 2.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Generates random users for a website
import argparse
from faker import Faker
import geocoder
from random import choice
parser = argparse.ArgumentParser(__file__, description="Web Server Data Generator")
parser.add_argument("--num_users", "-u", type=int, dest="num_users",
help="The number of users to create", default=100)
args = parser.parse_args()
num_users = int(args.num_users)
faker = Faker()
# A list of functions for generating user agent strings for various browsers
ualist = [faker.firefox, faker.chrome, faker.safari, faker.internet_explorer, faker.opera]
sensitive_fields = ['lat', 'lng', 'ip', 'user_agent']
def generate_user():
"""
Returns a randomly generate dictionary representing a user, where each user is described by
a user agent string, an ID, a latlng, an IP, an age_bracket, whether they've oped into marketing
and the
:return:
"""
user = {}
user['lat'] = ""
user['lng'] = ""
while user['lat'] == "" or user['lng'] == "":
user['ip'] = faker.ipv4()
g = geocoder.ip(user['ip'])
latlng = list(map(str, g.latlng))
if len(latlng) == 2:
user['lat'] = latlng[0]
user['lng'] = latlng[1]
user['user_agent'] = choice(ualist)()
user['age_bracket'] = choice(['18-25', '26-40', '41-55', '55+'])
user['opted_into_marketing'] = choice([True, False])
user['id'] = hash(str(user['ip']) + str(user['lat'] + str(user['lng'])))
return user
def write_csvs(users):
"""
Writes two .csv files, one for ingestiong by an event generator, the other formatted to be uploaded to BigQuery
:param users:
:return:
"""
with open("users.csv", 'w') as event_out, open("users_bq.txt", 'w') as bq_out:
cols = list(users[0].keys())
cols.sort()
bq_cols = cols.copy()
[bq_cols.remove(s) for s in sensitive_fields]
event_out.write(",".join(cols) + '\n')
for user in users:
event_vals = [str(user[key]) for key in cols]
event_out.write(",".join(event_vals) + '\n')
bq_vals = [str(user[key]) for key in bq_cols]
bq_out.write(",".join(bq_vals) + '\n')
if __name__ == '__main__':
users = [generate_user() for i in range(num_users)]
write_csvs(users)