replication-scripts/mn2018.py at main · MEDSL/replication-scripts · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 28 23:46:38 2021

@author: darsh
"""
import pandas as pd
import csv

path = '/Users/darsh/Documents/GitHub/2018-precincts/MN/2021adapt/2018-mn-precinct-autoadapted.csv'
df = pd.read_csv(path, ',')

#some values aren't capitalized
df = df.applymap(lambda x:x.upper() if type(x) == str else x)

def district(x):
    x = str(x)
    if len(x)<3:
        if x.isdigit():
            return x.zfill(3)
        else:
            return x
    elif ', COURT ' in x and 'STATEWIDE' not in x:
        return ''.join(['00',x])
    else:
        return x
df['district'] = df['district'].apply(district)

check = df.loc[df['district'].str.contains(', COURT ')]
print(check['district'].unique())
#-----------------------------------------------------------------------------------------------
# #find candidates with '(' in nicknames etc)
# check = df.loc[df['candidate'].str.contains('\)')]
# print(check['candidate'].unique())

def candidate(x):
    #remove '.' and ','
    x = x.replace('.','')
    x = x.replace(',','')

    #rename write in
    if x == '[WRITE-IN]':
        return 'WRITEIN'

    #nicknames
    elif x == 'CALVIN (CAL) K BAHR':
        return 'CALVIN K \"CAL\" BAHR'
    elif x == 'JOHN BACHMAN (JOHNNY B)':
        return 'JOHN \"JOHNNY B\" BACHMAN'
    elif x == 'BARBARA MCFADDEN (KRAEMER)':
        return 'BARBARA \"KRAEMER\" MCFADDEN'
    elif x == 'MICHAEL D CARR (JR)':
        return 'MICHAEL D CARR JR'
    elif x == 'HEIDI \'BLY\' JONES':
        return 'HEIDI \"BLY\" JONES'
    elif x == 'KAREN "KARRIE" S KELLY':
        return 'KAREN S "KARRIE" KELLY'
    elif x == 'KATHRYN "KATIE" M NORBY':
        return 'KATHRYN M "KATIE" NORBY'
    elif x == 'ROBERT "BOB" ARTHUR HARWARTH':
        return 'ROBERT ARTHUR "BOB" HARWARTH'

    #other 'similar names' in QA check all belong to different races

    else:
        x = x.replace(')','\"')
        x = x.replace('(','\"')
        return x
df['candidate'] = df['candidate'].apply(candidate)

#-----------------------------------------------------------------------------------------------
#office name 'JUDGE' is district court judge. The district column has info in same format for these
#example: '7, COURT 16' is 16th court of the 7th district. These values can remain, only change office

def office(x):
    if x == 'JUDGE':
        return 'DISTRICT COURT JUDGE'
    else:
        return x
df['office'] = df['office'].apply(office)
#-----------------------------------------------------------------------------------------------
def dataverse(office, dv):
    if office == 'DISTRICT COURT JUDGE':
        return 'STATE'
    else:
        return dv
df['dataverse'] = df.apply(lambda df: dataverse(df['office'],df['dataverse']), axis=1)

def party(x):
    if x == 'INDEPENDENT' or x == 'UNAFFILIATED':
        return 'NONPARTISAN'
    else:
        return x

df['party_detailed'] = df['party_detailed'].apply(party)
#-----------------------------------------------------------------------------------------------
#get fips code dataframe for MN counties
county_fips_path = '/Users/darsh/Documents/GitHub/2020-precincts/help-files/county-fips-codes.csv'
fips = pd.read_csv(county_fips_path, delimiter=',', header=0)
fips = pd.DataFrame(fips)
fips = fips.loc[fips['state']=='Minnesota']
fips_dict = dict(zip(fips.county_name, fips.county_fips))

def fips(x):
    if x == 'ST. LOUIS':
        x = 'SAINT LOUIS'
    code = fips_dict[x]
    return str(code)

df['county_fips'] = df['county_name'].apply(fips)
df['jurisdiction_fips'] = df['county_fips']
#-----------------------------------------------------------------------------------------------
def string(x):
    if x == True:
        return 'TRUE'
    elif x == False:
        return 'FALSE'

df['writein'] = df['writein'].apply(string)
df['special'] = df['special'].apply(string)
#-----------------------------------------------------------------------------------------------
#all for the 2018 general election under 'stage', being the 11/6 midterm election
df['date'] = '2018-11-06'
#unique office values show only 1 winner
df['magnitude'] = '1'
df['readme_check'] = 'FALSE'

df.to_csv('2018-mn-precinct-final.csv', index = False, quoting=csv.QUOTE_NONNUMERIC)