OpenDSS_VoltageEstimation/data_processing.py at main · HodgeLab/OpenDSS_VoltageEstimation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
Data processing functions for OpenDSS analysis
"""

import os
import pandas as pd
from file_readers import read_loadfiles, read_pvfiles, read_data, read_solar_data, read_peak_loadfiles


def process_timeseries_loads(dataset, region, scenario):
    """Process timeseries load data"""
    file_paths = []
    file_paths_pv = []

    for root, dirs, files in os.walk(os.path.join(dataset, region, 'scenarios', scenario, 'opendss')):
        if 'Loads.dss' in files:
            file_paths.append(os.path.join(root, 'Loads.dss'))
        if 'PVSystems.dss' in files:
            file_paths_pv.append(os.path.join(root, 'PVSystems.dss'))

    enduse_folder = os.path.join(dataset, region, 'load_data')
    solar_folder = os.path.join(dataset, region, 'solar_data')

    print('Reading Loads.dss files')
    print('Reading timeseries loads')

    # Sequential processing for load files
    result_loads = []
    for file_path in file_paths:
        result_loads.append(read_loadfiles(file_path))

    all_load_mappings = {}
    for load_mapping in result_loads:
        all_load_mappings.update(load_mapping)

    all_parquet_files = {}  # Map the profile to the number of times it occurs
    for loadfile in all_load_mappings:
        profile, count = all_load_mappings[loadfile]
        if not profile in all_parquet_files:
            all_parquet_files[profile] = 0
        all_parquet_files[profile] += count

    # Sequential processing for parquet files
    result = []
    for file_path in all_parquet_files.keys():
        result.append(read_data(os.path.join(enduse_folder, file_path+'.parquet')))

    print('Reading PVSystems.dss files')

    # Sequential processing for PV files
    result_pv_list = []
    for file_path in file_paths_pv:
        result_pv_list.append(read_pvfiles(file_path))

    all_pv_mappings = {}
    for pv_mapping in result_pv_list:
        all_pv_mappings.update(pv_mapping)

    all_csv_files = {}
    for solar_file in all_pv_mappings:
        profile = all_pv_mappings[solar_file]
        if not profile in all_csv_files:
            all_csv_files[profile] = 0
        all_csv_files[profile] += 1

    # Sequential processing for solar data
    result_pv = []
    for file_path in all_csv_files.keys():
        result_pv.append(read_solar_data(os.path.join(solar_folder, file_path+'_full.csv')))

    # Process results
    region_all_kw = []
    all_kvar = []
    profile_data = {}
    pv_profile_data = {}

    for profile, values in result:
        profile_data[profile] = values
        region_all_kw.append(values[0] * all_parquet_files[profile])
        all_kvar.append(values[1] * all_parquet_files[profile])

    for profile, values in result_pv:
        pv_profile_data[profile] = values

    total_load = sum(region_all_kw)
    max_time = total_load.idxmax()

    peak_total_day = int(max_time/4/24) + 1
    peak_total_hour = int(max_time/4) - (peak_total_day-1)*24
    peak_total_min = (max_time%4) * 15

    return (all_load_mappings, all_pv_mappings, profile_data, pv_profile_data,
            total_load, max_time, peak_total_day, peak_total_hour, peak_total_min,
            region_all_kw, all_kvar)


def process_peak_loads(dataset, region, scenario):
    """Process peak load data"""
    file_paths = []
    peak_folder = os.path.join(dataset, region, 'scenarios', scenario, 'opendss')

    for substation in os.listdir(peak_folder):
        if os.path.isdir(os.path.join(peak_folder, substation)) and not substation == 'subtransmission' and not substation == 'analysis':
            for feeder in os.listdir(os.path.join(peak_folder, substation)):
                if os.path.exists(os.path.join(peak_folder, substation, feeder, 'Loads.dss')) and not feeder == 'analysis':
                    file_paths.append((os.path.join(peak_folder, substation, feeder, 'Loads.dss'), substation, feeder))

    print('Reading Loads.dss files')

    # Sequential processing
    result_loads = []
    for file_path in file_paths:
        result_loads.append(read_peak_loadfiles(file_path))

    peak_loads = {}
    for kw_total, kw_res, kw_com, substation, feeder in result_loads:
        if not ('','') in peak_loads:
            peak_loads[('','')] = (0, 0, 0)
        peak_loads[('','')] = tuple(map(sum, zip(peak_loads[('','')], (kw_total, kw_res, kw_com))))
        if not (substation,'') in peak_loads:
            peak_loads[(substation,'')] = (0, 0, 0)
        peak_loads[(substation,'')] = tuple(map(sum, zip(peak_loads[(substation,'')], (kw_total, kw_res, kw_com))))

        peak_loads[(substation, feeder)] = (kw_total, kw_res, kw_com)

    return peak_loads


def get_all_feeders(base_folder):
    """Get all feeders from the base folder"""
    all_feeders = [('','')]
    for substation in os.listdir(base_folder):
        if os.path.isdir(os.path.join(base_folder, substation)) and not substation == 'subtransmission' and not substation == 'analysis':
            all_feeders.append((substation,''))
            for feeder in os.listdir(os.path.join(base_folder, substation)):
                if os.path.isdir(os.path.join(base_folder, substation, feeder)) and not feeder == 'analysis':
                    all_feeders.append((substation, feeder))
    return all_feeders