-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_processing.py
More file actions
138 lines (107 loc) · 5.17 KB
/
data_processing.py
File metadata and controls
138 lines (107 loc) · 5.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
Data processing functions for OpenDSS analysis
"""
import os
import pandas as pd
from file_readers import read_loadfiles, read_pvfiles, read_data, read_solar_data, read_peak_loadfiles
def process_timeseries_loads(dataset, region, scenario):
"""Process timeseries load data"""
file_paths = []
file_paths_pv = []
for root, dirs, files in os.walk(os.path.join(dataset, region, 'scenarios', scenario, 'opendss')):
if 'Loads.dss' in files:
file_paths.append(os.path.join(root, 'Loads.dss'))
if 'PVSystems.dss' in files:
file_paths_pv.append(os.path.join(root, 'PVSystems.dss'))
enduse_folder = os.path.join(dataset, region, 'load_data')
solar_folder = os.path.join(dataset, region, 'solar_data')
print('Reading Loads.dss files')
print('Reading timeseries loads')
# Sequential processing for load files
result_loads = []
for file_path in file_paths:
result_loads.append(read_loadfiles(file_path))
all_load_mappings = {}
for load_mapping in result_loads:
all_load_mappings.update(load_mapping)
all_parquet_files = {} # Map the profile to the number of times it occurs
for loadfile in all_load_mappings:
profile, count = all_load_mappings[loadfile]
if not profile in all_parquet_files:
all_parquet_files[profile] = 0
all_parquet_files[profile] += count
# Sequential processing for parquet files
result = []
for file_path in all_parquet_files.keys():
result.append(read_data(os.path.join(enduse_folder, file_path+'.parquet')))
print('Reading PVSystems.dss files')
# Sequential processing for PV files
result_pv_list = []
for file_path in file_paths_pv:
result_pv_list.append(read_pvfiles(file_path))
all_pv_mappings = {}
for pv_mapping in result_pv_list:
all_pv_mappings.update(pv_mapping)
all_csv_files = {}
for solar_file in all_pv_mappings:
profile = all_pv_mappings[solar_file]
if not profile in all_csv_files:
all_csv_files[profile] = 0
all_csv_files[profile] += 1
# Sequential processing for solar data
result_pv = []
for file_path in all_csv_files.keys():
result_pv.append(read_solar_data(os.path.join(solar_folder, file_path+'_full.csv')))
# Process results
region_all_kw = []
all_kvar = []
profile_data = {}
pv_profile_data = {}
for profile, values in result:
profile_data[profile] = values
region_all_kw.append(values[0] * all_parquet_files[profile])
all_kvar.append(values[1] * all_parquet_files[profile])
for profile, values in result_pv:
pv_profile_data[profile] = values
total_load = sum(region_all_kw)
max_time = total_load.idxmax()
peak_total_day = int(max_time/4/24) + 1
peak_total_hour = int(max_time/4) - (peak_total_day-1)*24
peak_total_min = (max_time%4) * 15
return (all_load_mappings, all_pv_mappings, profile_data, pv_profile_data,
total_load, max_time, peak_total_day, peak_total_hour, peak_total_min,
region_all_kw, all_kvar)
def process_peak_loads(dataset, region, scenario):
"""Process peak load data"""
file_paths = []
peak_folder = os.path.join(dataset, region, 'scenarios', scenario, 'opendss')
for substation in os.listdir(peak_folder):
if os.path.isdir(os.path.join(peak_folder, substation)) and not substation == 'subtransmission' and not substation == 'analysis':
for feeder in os.listdir(os.path.join(peak_folder, substation)):
if os.path.exists(os.path.join(peak_folder, substation, feeder, 'Loads.dss')) and not feeder == 'analysis':
file_paths.append((os.path.join(peak_folder, substation, feeder, 'Loads.dss'), substation, feeder))
print('Reading Loads.dss files')
# Sequential processing
result_loads = []
for file_path in file_paths:
result_loads.append(read_peak_loadfiles(file_path))
peak_loads = {}
for kw_total, kw_res, kw_com, substation, feeder in result_loads:
if not ('','') in peak_loads:
peak_loads[('','')] = (0, 0, 0)
peak_loads[('','')] = tuple(map(sum, zip(peak_loads[('','')], (kw_total, kw_res, kw_com))))
if not (substation,'') in peak_loads:
peak_loads[(substation,'')] = (0, 0, 0)
peak_loads[(substation,'')] = tuple(map(sum, zip(peak_loads[(substation,'')], (kw_total, kw_res, kw_com))))
peak_loads[(substation, feeder)] = (kw_total, kw_res, kw_com)
return peak_loads
def get_all_feeders(base_folder):
"""Get all feeders from the base folder"""
all_feeders = [('','')]
for substation in os.listdir(base_folder):
if os.path.isdir(os.path.join(base_folder, substation)) and not substation == 'subtransmission' and not substation == 'analysis':
all_feeders.append((substation,''))
for feeder in os.listdir(os.path.join(base_folder, substation)):
if os.path.isdir(os.path.join(base_folder, substation, feeder)) and not feeder == 'analysis':
all_feeders.append((substation, feeder))
return all_feeders