From 808fc10b91ff3fa32320c23bfc5eb14cf38e5462 Mon Sep 17 00:00:00 2001 From: DARYA GUETTLER Date: Fri, 3 Oct 2025 11:38:18 -0400 Subject: [PATCH 1/6] update to add solar as a negative end use --- epengine/models/inference.py | 111 ++++++++++++++++++++++++----------- 1 file changed, 76 insertions(+), 35 deletions(-) diff --git a/epengine/models/inference.py b/epengine/models/inference.py index 4e29f2e..bb2d443 100644 --- a/epengine/models/inference.py +++ b/epengine/models/inference.py @@ -61,7 +61,8 @@ ) from epengine.models.transforms import CategoricalFeature, RegressorInputSpec -END_USES = ("Lighting", "Equipment", "DomesticHotWater", "Heating", "Cooling") +END_USES = ("Lighting", "Equipment", "DomesticHotWater", "Heating", "Cooling", "Solar") +RAW_END_USES = ("Lighting", "Equipment", "DomesticHotWater", "Heating", "Cooling") OIL_HEATING_SYSTEMS = ["OilHeating"] NG_HEATING_SYSTEMS = ["NaturalGasHeating", "NaturalGasCondensingHeating"] INCOME_BRACKETS = [ @@ -85,7 +86,7 @@ _has_warned_max_solar_capacity_cap = False DATASET_SEGMENT_MAP = { - "Raw": END_USES, + "Raw": RAW_END_USES, "EndUse": END_USES, "Fuel": FUELS, "EndUseCost": END_USES, @@ -189,22 +190,16 @@ def serialized(self) -> BaseModel: # noqa: C901 field_datas = {} percentile_mapper = {v[1]: v[0] for v in PERCENTILES.values()} - # Create copies of summary dataframes with percentile mapper applied costs_summary_renamed = self.costs_summary.rename(index=percentile_mapper) - # paybacks_summary_renamed = self.paybacks_summary.rename(index=percentile_mapper) - # Process all numeric columns in the costs dataframe for col in self.costs.columns: - # Skip non-numeric columns (e.g., metadata objects) if not pd.api.types.is_numeric_dtype(self.costs[col]): continue - # Skip detailed cost columns like cost.Trigger.Final (keep only cost.Trigger) if col.startswith("cost.") and col.count(".") > 1: continue col_name = col.split(".")[-1] field_specs[col_name] = (SummarySpec, Field(title=col)) - # Get summary data for this column if col in costs_summary_renamed.columns: field_data = costs_summary_renamed.loc[:, col].to_dict() else: @@ -295,6 +290,16 @@ def create_end_use_disaggregation_spec(SummarySpec: type[SummarySpecBase]): ) +def create_raw_end_use_disaggregation_spec(SummarySpec: type[SummarySpecBase]): + """Create a raw end use disaggregation spec WITHOUT Solar as a field.""" + fields = {} + for end_use in RAW_END_USES: + fields[end_use] = (SummarySpec, Field(title=end_use)) + return create_model( + "RawEndUseDisaggregationSpec", **fields, __config__=ConfigDict(extra="forbid") + ) + + def create_fuel_disaggregation_spec(SummarySpec: type[SummarySpecBase]): """Create a fuel disaggregation spec with the fuels as fields.""" fields = {} @@ -306,12 +311,16 @@ def create_fuel_disaggregation_spec(SummarySpec: type[SummarySpecBase]): def create_disaggregation_spec( - EndUseDisaggregationSpec: type[BaseModel], FuelDisaggregationSpec: type[BaseModel] + EndUseDisaggregationSpec: type[BaseModel], + RawEndUseDisaggregationSpec: type[BaseModel], + FuelDisaggregationSpec: type[BaseModel], ): """Create a disaggregation spec with the datasets as fields.""" fields = {} for dataset, dataset_segments in DATASET_SEGMENT_MAP.items(): - if dataset_segments == END_USES: + if dataset == "Raw": + fields[dataset] = (RawEndUseDisaggregationSpec, Field(title=dataset)) + elif dataset_segments == END_USES: fields[dataset] = (EndUseDisaggregationSpec, Field(title=dataset)) elif dataset_segments == FUELS: fields[dataset] = (FuelDisaggregationSpec, Field(title=dataset)) @@ -377,9 +386,10 @@ def create_sbem_inference_savings_response_spec( SummarySpec = create_summary_spec() EndUseDisaggregationSpec = create_end_use_disaggregation_spec(SummarySpec) +RawEndUseDisaggregationSpec = create_raw_end_use_disaggregation_spec(SummarySpec) FuelDisaggregationSpec = create_fuel_disaggregation_spec(SummarySpec) DisaggregationSpec = create_disaggregation_spec( - EndUseDisaggregationSpec, FuelDisaggregationSpec + EndUseDisaggregationSpec, RawEndUseDisaggregationSpec, FuelDisaggregationSpec ) DisaggregationsSpec = create_disaggregations_spec(DisaggregationSpec) TotalSpec = create_total_spec(SummarySpec) @@ -1701,21 +1711,43 @@ def separate_fuel_based_end_uses( ) gas = pd.concat( - [heat_gas, cool_gas, dhw_gas, lighting * 0, equipment * 0], + [ + heat_gas, + cool_gas, + dhw_gas, + lighting * 0, + equipment * 0, + ], axis=1, - keys=["Heating", "Cooling", "Domestic Hot Water", "Lighting", "Equipment"], + keys=[ + "Heating", + "Cooling", + "Domestic Hot Water", + "Lighting", + "Equipment", + ], )[df_end_uses.columns] oil = pd.concat( - [heat_oil, cool_oil, dhw_oil, lighting * 0, equipment * 0], + [ + heat_oil, + cool_oil, + dhw_oil, + lighting * 0, + equipment * 0, + ], axis=1, - keys=["Heating", "Cooling", "Domestic Hot Water", "Lighting", "Equipment"], + keys=[ + "Heating", + "Cooling", + "Domestic Hot Water", + "Lighting", + "Equipment", + ], )[df_end_uses.columns] # Store actual electricity consumption for solar calculations self._actual_electricity_consumption = actual_electricity_consumption - # Use net electricity consumption for the main fuel disaggregation - df_disaggregated_fuels = pd.concat( [actual_electricity_consumption, net_electricity_consumption, gas, oil], axis=1, @@ -1755,7 +1787,14 @@ def compute_costs( keys=["Electricity", "NetElectricity", "NaturalGas", "Oil"], names=["Fuel", "EndUse"], ) - end_use_costs = disaggregated_costs.T.groupby(level=["EndUse"]).sum().T + # EndUseCost should avoid double-counting raw Electricity; only include NetElectricity + fuels + allowed_end_use_costs = pd.concat( + [net_elec_costs, gas_costs, oil_costs], + axis=1, + keys=["NetElectricity", "NaturalGas", "Oil"], + names=["Fuel", "EndUse"], + ) + end_use_costs = allowed_end_use_costs.T.groupby(level=["EndUse"]).sum().T fuel_costs = disaggregated_costs.T.groupby(level=["Fuel"]).sum().T return fuel_costs, end_use_costs @@ -1817,6 +1856,11 @@ def compute_distributions(self, features: pd.DataFrame, results_raw: pd.DataFram results_disaggregated_fuels = self.separate_fuel_based_end_uses( df_features=features, df_end_uses=results_end_uses ) + solar_end_use_total = results_disaggregated_fuels["NetElectricity"].sum( + axis=1 + ) - results_disaggregated_fuels["Electricity"].sum(axis=1) + results_end_uses = results_end_uses.copy() + results_end_uses["Solar"] = solar_end_use_total results_fuels = results_disaggregated_fuels.T.groupby(level=["Fuel"]).sum().T results_fuel_costs, results_end_use_costs = self.compute_costs( @@ -1870,28 +1914,25 @@ def compute_distributions(self, features: pd.DataFrame, results_raw: pd.DataFram total_keys: list[str] = [] for dataset in datasets_for_totals: - if dataset == "FuelCost": - # Sum only NetElectricity + NaturalGas + Oil to avoid double-counting Electricity - if "FuelCost" in disaggregated.columns.get_level_values("Dataset"): - fc = disaggregated.loc[:, ("FuelCost", slice(None))] - # keep only the relevant fuels if present - fuels = [ + # Default: sum all segments for the dataset if present + if dataset in disaggregated.columns.get_level_values("Dataset"): + cols = disaggregated.xs(dataset, level="Dataset", axis=1) + if dataset == "FuelCost": + # Only count NetElectricity + NaturalGas + Oil for FuelCost + allowed = [ c - for c in fc.columns.get_level_values("Segment") + for c in cols.columns if c in ("NetElectricity", "NaturalGas", "Oil") ] - if fuels: - s = fc.loc[:, (slice(None), fuels)].sum(axis=1) - else: - s = pd.Series(0.0, index=disaggregated.index) + s = ( + cols.loc[:, allowed].sum(axis=1) + if len(allowed) > 0 + else pd.Series(0.0, index=disaggregated.index) + ) else: - s = pd.Series(0.0, index=disaggregated.index) + s = cols.sum(axis=1) else: - # Default behavior: sum all segments for the dataset - if dataset in disaggregated.columns.get_level_values("Dataset"): - s = disaggregated.xs(dataset, level="Dataset", axis=1).sum(axis=1) - else: - s = pd.Series(0.0, index=disaggregated.index) + s = pd.Series(0.0, index=disaggregated.index) total_parts.append(s) total_keys.append(dataset) From b488aaa173cde7797a2bcc196601513677ef8dd5 Mon Sep 17 00:00:00 2001 From: DARYA GUETTLER Date: Fri, 3 Oct 2025 12:17:28 -0400 Subject: [PATCH 2/6] update exclusion method --- epengine/models/inference.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/epengine/models/inference.py b/epengine/models/inference.py index bb2d443..43d634f 100644 --- a/epengine/models/inference.py +++ b/epengine/models/inference.py @@ -1794,8 +1794,16 @@ def compute_costs( keys=["NetElectricity", "NaturalGas", "Oil"], names=["Fuel", "EndUse"], ) - end_use_costs = allowed_end_use_costs.T.groupby(level=["EndUse"]).sum().T - fuel_costs = disaggregated_costs.T.groupby(level=["Fuel"]).sum().T + end_use_costs = cast( + pd.DataFrame, + allowed_end_use_costs.groupby(level="EndUse", axis=1).sum(), + ) + + if "Solar" not in end_use_costs.columns: + end_use_costs["Solar"] = 0.0 + fuel_costs = cast( + pd.DataFrame, disaggregated_costs.T.groupby(level=["Fuel"]).sum().T + ) return fuel_costs, end_use_costs @@ -1833,8 +1841,15 @@ def compute_emissions( keys=["Electricity", "NetElectricity", "NaturalGas", "Oil"], names=["Fuel", "EndUse"], ) - end_use_emissions = disaggregated_emissions.T.groupby(level=["EndUse"]).sum().T - fuel_emissions = disaggregated_emissions.T.groupby(level=["Fuel"]).sum().T + end_use_emissions = cast( + pd.DataFrame, + disaggregated_emissions.groupby(level="EndUse", axis=1).sum(), + ) + if "Solar" not in end_use_emissions.columns: + end_use_emissions["Solar"] = 0.0 + fuel_emissions = cast( + pd.DataFrame, disaggregated_emissions.T.groupby(level=["Fuel"]).sum().T + ) return fuel_emissions, end_use_emissions From aafbc1f86ee287fdb748f17f76154ae405044893 Mon Sep 17 00:00:00 2001 From: DARYA GUETTLER Date: Fri, 3 Oct 2025 12:56:44 -0400 Subject: [PATCH 3/6] update emissions calc with exclusion --- epengine/models/inference.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/epengine/models/inference.py b/epengine/models/inference.py index 43d634f..348420e 100644 --- a/epengine/models/inference.py +++ b/epengine/models/inference.py @@ -1787,20 +1787,18 @@ def compute_costs( keys=["Electricity", "NetElectricity", "NaturalGas", "Oil"], names=["Fuel", "EndUse"], ) - # EndUseCost should avoid double-counting raw Electricity; only include NetElectricity + fuels - allowed_end_use_costs = pd.concat( - [net_elec_costs, gas_costs, oil_costs], + base_end_use_costs = pd.concat( + [elec_costs, gas_costs, oil_costs], axis=1, - keys=["NetElectricity", "NaturalGas", "Oil"], + keys=["Electricity", "NaturalGas", "Oil"], names=["Fuel", "EndUse"], ) end_use_costs = cast( pd.DataFrame, - allowed_end_use_costs.groupby(level="EndUse", axis=1).sum(), + base_end_use_costs.groupby(level="EndUse", axis=1).sum(), ) - - if "Solar" not in end_use_costs.columns: - end_use_costs["Solar"] = 0.0 + solar_cost_total = net_elec_costs.sum(axis=1) - elec_costs.sum(axis=1) + end_use_costs["Solar"] = solar_cost_total fuel_costs = cast( pd.DataFrame, disaggregated_costs.T.groupby(level=["Fuel"]).sum().T ) @@ -1841,14 +1839,24 @@ def compute_emissions( keys=["Electricity", "NetElectricity", "NaturalGas", "Oil"], names=["Fuel", "EndUse"], ) + allowed_end_use_emissions = pd.concat( + [elec_emissions, gas_emissions, oil_emissions], + axis=1, + keys=["Electricity", "NaturalGas", "Oil"], + names=["Fuel", "EndUse"], + ) end_use_emissions = cast( pd.DataFrame, - disaggregated_emissions.groupby(level="EndUse", axis=1).sum(), + allowed_end_use_emissions.groupby(level="EndUse", axis=1).sum(), ) - if "Solar" not in end_use_emissions.columns: - end_use_emissions["Solar"] = 0.0 + solar_emissions_total = net_elec_emissions.sum(axis=1) - elec_emissions.sum( + axis=1 + ) + end_use_emissions["Solar"] = solar_emissions_total + fuel_emissions = cast( - pd.DataFrame, disaggregated_emissions.T.groupby(level=["Fuel"]).sum().T + pd.DataFrame, + disaggregated_emissions.groupby(level="Fuel", axis=1).sum(), ) return fuel_emissions, end_use_emissions @@ -1932,8 +1940,7 @@ def compute_distributions(self, features: pd.DataFrame, results_raw: pd.DataFram # Default: sum all segments for the dataset if present if dataset in disaggregated.columns.get_level_values("Dataset"): cols = disaggregated.xs(dataset, level="Dataset", axis=1) - if dataset == "FuelCost": - # Only count NetElectricity + NaturalGas + Oil for FuelCost + if dataset in ("FuelCost", "FuelEmissions"): allowed = [ c for c in cols.columns From 759c316d6bc76915163f1f561a4471a5e243a9b7 Mon Sep 17 00:00:00 2001 From: DARYA GUETTLER Date: Tue, 14 Oct 2025 10:45:40 -0400 Subject: [PATCH 4/6] update inference to correct the dependent sampling approach --- epengine/models/inference.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/epengine/models/inference.py b/epengine/models/inference.py index 348420e..5148cba 100644 --- a/epengine/models/inference.py +++ b/epengine/models/inference.py @@ -2217,24 +2217,12 @@ def run( # noqa: C901 new_features = changed_priors.sample( new_features, len(new_features), self.original.generator ) - - # Create an upgraded spec with the new semantic field context - upgraded_spec = SBEMInferenceRequestSpec( - **{ - k: v - for k, v in self.original.model_dump().items() - if k != "semantic_field_context" - }, - semantic_field_context=self.upgraded_semantic_field_context, + new_transformed_features = self.original.source_feature_transform.transform( + new_features ) - # Run inference with the upgraded spec - new_results = upgraded_spec.run(n) # Get peak results for cost calculations - new_results_raw = upgraded_spec.predict( - upgraded_spec.source_feature_transform.transform( - upgraded_spec.make_features(n)[0] - ) - ) + new_results_raw = self.original.predict(new_transformed_features) + new_results = self.original.compute_distributions(new_features, new_results_raw) new_results_peak = cast(pd.DataFrame, new_results_raw["Peak"]) # new_results_energy = cast(pd.DataFrame, new_results_raw["Energy"]) @@ -2269,6 +2257,15 @@ def run( # noqa: C901 # Compute features for cost calculations after inference # For solar upgrades, we need to use the ACTUAL electricity consumption (before solar) # to calculate the system size needed, not the net consumption if there is alrearyd some solar + upgraded_spec = SBEMInferenceRequestSpec( + **{ + k: v + for k, v in self.original.model_dump().items() + if k != "semantic_field_context" + }, + semantic_field_context=self.upgraded_semantic_field_context, + ) + electricity_eui = upgraded_spec._actual_electricity_consumption.sum(axis=1) # Calculate the feature distributions for solar features (yield, coverage) From 1f8b32dd06923cc8bb7e8976da5a0474a12a44c4 Mon Sep 17 00:00:00 2001 From: DARYA GUETTLER Date: Wed, 15 Oct 2025 22:57:04 -0400 Subject: [PATCH 5/6] move solar priors to make_priors --- epengine/models/data/retrofit-costs.json | 593 +++++++---------------- epengine/models/inference.py | 244 ++++++---- 2 files changed, 340 insertions(+), 497 deletions(-) diff --git a/epengine/models/data/retrofit-costs.json b/epengine/models/data/retrofit-costs.json index d6c1f1f..3cb8f3c 100644 --- a/epengine/models/data/retrofit-costs.json +++ b/epengine/models/data/retrofit-costs.json @@ -13,7 +13,7 @@ "error_scale": 0.05, "units": "$/kW", "per": "solar PV capacity", - "description": "PV installation cost per W ($3.04/W)", + "description": "D PV installation cost per W ($3.04/W)", "source": "EnergySage" } ] @@ -31,7 +31,7 @@ "error_scale": 0.05, "units": "$/kW", "per": "solar PV capacity", - "description": "PV installation cost per W ($3.04/W)", + "description": "D PV installation cost per W ($3.04/W)", "source": "EnergySage" } ] @@ -39,7 +39,7 @@ { "trigger_column": "OnsiteSolar", "initial": "NoSolarPV", - "final": "MaxSolarPV", + "final": "HighSolarPV", "order": ["LinearQuantity"], "quantity_factors": [ { @@ -49,7 +49,7 @@ "error_scale": 0.05, "units": "$/kW", "per": "solar PV capacity", - "description": "PV installation cost per W ($3.04/W)", + "description": "D PV installation cost per W ($3.04/W)", "source": "EnergySage" } ] @@ -69,7 +69,7 @@ "error_scale": 0.05, "units": "$/m2", "per": "gross floor area", - "description": "", + "description": "D", "source": "" } ] @@ -82,7 +82,7 @@ "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 8.83, + "coefficient": 100, "indicator_cols": [ "feature.geometry.energy_model_conditioned_area", "feature.geometry.est_fp_ratio" @@ -90,7 +90,7 @@ "error_scale": 0.05, "units": "$/m2", "per": "gross floor area", - "description": "", + "description": "D not possible", "source": "" } ] @@ -103,7 +103,7 @@ "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 8.83, + "coefficient": 100, "indicator_cols": [ "feature.geometry.energy_model_conditioned_area", "feature.geometry.est_fp_ratio" @@ -111,7 +111,7 @@ "error_scale": 0.05, "units": "$/m2", "per": "gross floor area", - "description": "", + "description": "D not possible", "source": "" } ] @@ -120,62 +120,82 @@ "trigger_column": "Weatherization", "initial": "SomewhatLeakyEnvelope", "final": "TightEnvelope", - "order": ["LinearQuantity"], + "order": ["FixedQuantity", "LinearQuantity"], "quantity_factors": [ + { + "type": "FixedQuantity", + "amount": 577, + "error_scale": 0.05, + "description": "Base intercept for air seal cost based on conditioned area", + "source": "D EVS dataset" + }, { "type": "LinearQuantity", - "coefficient": 6.35, + "coefficient": 1.345, "indicator_cols": [ "feature.geometry.energy_model_conditioned_area", "feature.geometry.est_fp_ratio" ], - "error_scale": 0.05, + "error_scale": 0.25, "units": "$/m2", "per": "gross floor area", - "description": "", + "description": "D EVS dataset", "source": "" } ] }, { "trigger_column": "Weatherization", - "initial": "SomewhatLeakyEnvelope", + "initial": "TightEnvelope", "final": "TightEnvelopeHRV", - "order": ["LinearQuantity"], + "order": ["FixedQuantity", "LinearQuantity"], "quantity_factors": [ + { + "type": "FixedQuantity", + "amount": 4524, + "error_scale": 0.05, + "description": "2000 ERV 2000 istall, EVS intercept", + "source": "D REMDB and homewyse" + }, { "type": "LinearQuantity", - "coefficient": 6.35, + "coefficient": 1.87, "indicator_cols": [ "feature.geometry.energy_model_conditioned_area", "feature.geometry.est_fp_ratio" ], - "error_scale": 0.05, + "error_scale": 0.15, "units": "$/m2", "per": "gross floor area", - "description": "", + "description": "Upper end job. Uses EVS data above .1 ACH drop starting from below .4 ACH", "source": "" } ] }, { - "trigger_column": "RoofInsulation", - "initial": "InsulatedRoof", - "final": "HighlyInsulatedRoof", - "order": ["LinearQuantity"], + "trigger_column": "Weatherization", + "initial": "LeakyEnvelope", + "final": "SomewhatLeakyEnvelope", + "order": ["FixedQuantity", "LinearQuantity"], "quantity_factors": [ + { + "type": "FixedQuantity", + "amount": 577, + "error_scale": 0.05, + "description": "Base intercept for air seal cost based on conditioned area", + "source": "EVS dataset" + }, { "type": "LinearQuantity", - "coefficient": 129.17, + "coefficient": 1.345, "indicator_cols": [ - "feature.geometry.computed.roof_surface_area", - "feature.geometry.roof_is_flat.num", + "feature.geometry.energy_model_conditioned_area", "feature.geometry.est_fp_ratio" ], - "error_scale": 0.05, + "error_scale": 0.1, "units": "$/m2", - "per": "roof surface area", - "description": "", + "per": "gross floor area", + "description": "D EVS dataset", "source": "" } ] @@ -188,79 +208,87 @@ "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 129.17, + "coefficient": 26.9, "indicator_cols": [ - "feature.geometry.computed.roof_surface_area", - "feature.geometry.roof_is_flat.num", + "feature.geometry.computed.footprint_area", + "feature.geometry.roof_is_attic.num", "feature.geometry.est_fp_ratio" ], - "error_scale": 0.05, + "error_scale": 0.2, "units": "$/m2", "per": "roof surface area", "description": "", "source": "" - }, + } + ] + }, + { + "trigger_column": "RoofInsulation", + "initial": "InsulatedRoof", + "final": "HighlyInsulatedRoof", + "order": ["LinearQuantity"], + "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 142.73, + "coefficient": 127, "indicator_cols": [ "feature.geometry.computed.roof_surface_area", - "feature.geometry.roof_is_attic.num", + "feature.geometry.roof_is_flat.num", "feature.geometry.est_fp_ratio" ], "error_scale": 0.05, "units": "$/m2", "per": "roof surface area", - "description": "", + "description": "D 4in xps or similar, air barrier, roof membrane, cover board, blown inside rafters", "source": "" } ] }, - { - "trigger_column": "Walls", - "initial": "UninsulatedWalls", - "final": "FullInsulationWallsCavity", + "trigger_column": "RoofInsulation", + "initial": "UninsulatedRoof", + "final": "HighlyInsulatedRoof", "order": ["LinearQuantity"], "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 7.21, + "coefficient": 127, "indicator_cols": [ - "feature.geometry.computed.whole_bldg_facade_area", - "feature.geometry.est_uniform_linear_scaling_factor" + "feature.geometry.computed.roof_surface_area", + "feature.geometry.roof_is_flat.num", + "feature.geometry.est_fp_ratio" ], "error_scale": 0.05, "units": "$/m2", - "per": "facade area", - "description": "", - "source": "" + "per": "roof surface area", + "description": "D 4in xps or similar, air barrier, roof membrane, cover board, blown inside rafters", + "source": "RSMeans" }, { "type": "LinearQuantity", - "coefficient": 30.76, + "coefficient": 142.73, "indicator_cols": [ - "feature.geometry.computed.total_linear_facade_distance", - "feature.geometry.est_uniform_linear_scaling_factor" + "feature.geometry.computed.roof_surface_area", + "feature.geometry.roof_is_attic.num", + "feature.geometry.est_fp_ratio" ], "error_scale": 0.05, - "units": "$/m", - "per": "total linear facade distance", - "description": "", + "units": "$/m2", + "per": "roof surface area", + "description": "D Closed cell spray foam and drywall", "source": "" } ] }, - { "trigger_column": "Walls", "initial": "UninsulatedWalls", - "final": "SIP", + "final": "FullInsulationWallsCavity", "order": ["LinearQuantity"], "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 9.21, + "coefficient": 25, "indicator_cols": [ "feature.geometry.computed.whole_bldg_facade_area", "feature.geometry.est_uniform_linear_scaling_factor" @@ -268,21 +296,8 @@ "error_scale": 0.05, "units": "$/m2", "per": "facade area", - "description": "", - "source": "" - }, - { - "type": "LinearQuantity", - "coefficient": 30.76, - "indicator_cols": [ - "feature.geometry.computed.total_linear_facade_distance", - "feature.geometry.est_uniform_linear_scaling_factor" - ], - "error_scale": 0.05, - "units": "$/m", - "per": "total linear facade distance", - "description": "", - "source": "" + "description": "D", + "source": "EVS dataset cross verified RSMeans and LBNL Study" } ] }, @@ -294,7 +309,7 @@ "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 188.9, + "coefficient": 197, "indicator_cols": [ "feature.geometry.computed.whole_bldg_facade_area", "feature.geometry.est_uniform_linear_scaling_factor" @@ -302,21 +317,8 @@ "error_scale": 0.05, "units": "$/m2", "per": "facade area", - "description": "", - "source": "" - }, - { - "type": "LinearQuantity", - "coefficient": 30.76, - "indicator_cols": [ - "feature.geometry.computed.total_linear_facade_distance", - "feature.geometry.est_uniform_linear_scaling_factor" - ], - "error_scale": 0.05, - "units": "$/m", - "per": "total linear facade distance", - "description": "", - "source": "" + "description": "D inside cavity plus exterior 25 plus 172", + "source": "EVS dataset cross verified RSMeans and LBNL Study" } ] }, @@ -328,29 +330,16 @@ "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 14.42, + "coefficient": 45, "indicator_cols": [ "feature.geometry.computed.whole_bldg_facade_area", "feature.geometry.est_uniform_linear_scaling_factor" ], - "error_scale": 0.05, + "error_scale": 0.25, "units": "$/m2", "per": "facade area", - "description": "", - "source": "" - }, - { - "type": "LinearQuantity", - "coefficient": 61.52, - "indicator_cols": [ - "feature.geometry.computed.total_linear_facade_distance", - "feature.geometry.est_uniform_linear_scaling_factor" - ], - "error_scale": 0.05, - "units": "$/m", - "per": "total linear facade distance", - "description": "", - "source": "" + "description": "D inside cavity plus remove 25 plus 20, uncertainty", + "source": "EVS dataset cross verified RSMeans and LBNL Study" } ] }, @@ -362,29 +351,16 @@ "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 196.11, + "coefficient": 217, "indicator_cols": [ "feature.geometry.computed.whole_bldg_facade_area", "feature.geometry.est_uniform_linear_scaling_factor" ], - "error_scale": 0.05, + "error_scale": 0.25, "units": "$/m2", "per": "facade area", - "description": "", - "source": "" - }, - { - "type": "LinearQuantity", - "coefficient": 61.52, - "indicator_cols": [ - "feature.geometry.computed.total_linear_facade_distance", - "feature.geometry.est_uniform_linear_scaling_factor" - ], - "error_scale": 0.05, - "units": "$/m", - "per": "total linear facade distance", - "description": "", - "source": "" + "description": "D inside cavity plus exterior plus remove 25 plus 172 plus 20, uncertainy", + "source": "EVS dataset cross verified RSMeans and LBNL Study" } ] }, @@ -396,7 +372,7 @@ "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 181.69, + "coefficient": 172, "indicator_cols": [ "feature.geometry.computed.whole_bldg_facade_area", "feature.geometry.est_uniform_linear_scaling_factor" @@ -404,50 +380,50 @@ "error_scale": 0.05, "units": "$/m2", "per": "facade area", - "description": "", - "source": "" + "description": "D", + "source": "LBNL study" } ] }, { - "trigger_column": "AtticFloorInsulation", - "initial": "NoInsulation", - "final": "HighlyInsulated", + "trigger_column": "Walls", + "initial": "FullInsulationWalls", + "final": "SIP", "order": ["LinearQuantity"], "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 16.58, + "coefficient": 215, "indicator_cols": [ - "feature.geometry.computed.footprint_area", - "feature.geometry.est_fp_ratio" + "feature.geometry.computed.whole_bldg_facade_area", + "feature.geometry.est_uniform_linear_scaling_factor" ], "error_scale": 0.05, "units": "$/m2", - "per": "footprint area", - "description": "", - "source": "" + "per": "facade area", + "description": "D", + "source": "Assumed 10 dollars per sqft SIP itself" } ] }, { - "trigger_column": "AtticFloorInsulation", - "initial": "Insulated", - "final": "HighlyInsulated", + "trigger_column": "Walls", + "initial": "SomeInsulationWalls", + "final": "SIP", "order": ["LinearQuantity"], "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 9.58, + "coefficient": 215, "indicator_cols": [ - "feature.geometry.computed.footprint_area", - "feature.geometry.est_fp_ratio" + "feature.geometry.computed.whole_bldg_facade_area", + "feature.geometry.est_uniform_linear_scaling_factor" ], "error_scale": 0.05, "units": "$/m2", - "per": "footprint area", - "description": "", - "source": "" + "per": "facade area", + "description": "D", + "source": "Assumed 10 dollars per sqft SIP itself" } ] }, @@ -459,23 +435,23 @@ "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 16.58, + "coefficient": 21.52, "indicator_cols": [ "feature.geometry.computed.footprint_area", "feature.geometry.est_fp_ratio" ], - "error_scale": 0.05, + "error_scale": 0.1, "units": "$/m2", "per": "footprint area", - "description": "", - "source": "" + "description": "D Total cost needed to retrofit an attic floor", + "source": "EVS dataset cross verified w LBNL" } ] }, { "trigger_column": "AtticFloorInsulation", - "initial": "NoInsulation", - "final": "Insulated", + "initial": "Insulated", + "final": "HighlyInsulated", "order": ["LinearQuantity"], "quantity_factors": [ { @@ -488,7 +464,7 @@ "error_scale": 0.05, "units": "$/m2", "per": "footprint area", - "description": "", + "description": "D", "source": "" } ] @@ -509,8 +485,8 @@ "error_scale": 0.05, "units": "$/m2", "per": "window area", - "description": "", - "source": "" + "description": "D", + "source": "RSMeans, Homewyse" } ] }, @@ -530,8 +506,8 @@ "error_scale": 0.05, "units": "$/m2", "per": "window area", - "description": "", - "source": "" + "description": "D", + "source": "RSMeans, Homewyse" } ] }, @@ -543,10 +519,10 @@ "quantity_factors": [ { "type": "FixedQuantity", - "amount": 500.0, + "amount": 200.0, "error_scale": 0.05, - "description": "Fixed cost for programmable thermostat", - "source": "" + "description": "D NO C wire Fixed cost for programmable thermostat plus labor", + "source": "RSMeans, general market products" } ] }, @@ -560,8 +536,8 @@ "type": "FixedQuantity", "amount": 4250.0, "error_scale": 0.05, - "description": "Fixed cost for high efficiency equipment", - "source": "" + "description": "D Fixed cost for high efficiency equipment", + "source": "Market products" } ] }, @@ -573,10 +549,10 @@ "quantity_factors": [ { "type": "FixedQuantity", - "amount": 3000.0, + "amount": 2500.0, "error_scale": 0.05, - "description": "Fixed cost for high efficiency equipment", - "source": "" + "description": "D 50 Gallon unit plus labor", + "source": "RSMeans " } ] }, @@ -588,9 +564,9 @@ "quantity_factors": [ { "type": "FixedQuantity", - "amount": 3200.0, + "amount": 2700.0, "error_scale": 0.05, - "description": "", + "description": "D 50 Gallon unit plus labor", "source": "" } ] @@ -603,9 +579,9 @@ "quantity_factors": [ { "type": "FixedQuantity", - "amount": 3200.0, + "amount": 2700.0, "error_scale": 0.05, - "description": "", + "description": "D 50 Gallon unit plus labor", "source": "" } ] @@ -637,7 +613,7 @@ }, { "type": "LinearQuantity", - "coefficient": 303, + "coefficient": 89, "indicator_cols": [ "feature.calculated.heating_capacity_kW" ], @@ -832,231 +808,27 @@ { "trigger_column": "Cooling", "initial": null, - "final": "WindowASHP", - "order": ["FixedQuantity", "LinearQuantity"], + "final": "ASHPCooling", + "order": ["FixedQuantity"], "quantity_factors": [ { "type": "FixedQuantity", - "amount": -79, + "amount": 334, "error_scale": 0.05, "description": "Base intercept for ASHP cost based on capacity and building characteristics", "source": "Heat pump cost model" - }, - { - "type": "LinearQuantity", - "coefficient": 2.996, - "indicator_cols": [ - "feature.geometry.energy_model_conditioned_area" - ], - "error_scale": 0.05, - "units": "$/kW", - "per": "heating system", - "description": "Area-based component for ASHP cost based on capacity and building characteristics", - "source": "Heat pump cost model" - }, - { - "type": "LinearQuantity", - "coefficient": 303, - "indicator_cols": [ - "feature.calculated.heating_capacity_kW" - ], - "error_scale": 0.05, - "units": "$/kW", - "per": "heating system", - "description": "Heating capacity component for ASHP cost based on capacity and building characteristics", - "source": "Heat pump cost model" - }, - { - "type": "LinearQuantity", - "coefficient": 948, - "indicator_cols": ["feature.location.in_county.Berkshire"], - "error_scale": 0.05, - "description": "County factor for Berkshire", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 924, - "indicator_cols": ["feature.location.in_county.Bristol"], - "error_scale": 0.05, - "description": "County factor for Bristol", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 4777, - "indicator_cols": ["feature.location.in_county.Dukes"], - "error_scale": 0.05, - "description": "County factor for Dukes", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 2864, - "indicator_cols": ["feature.location.in_county.Essex"], - "error_scale": 0.05, - "description": "County factor for Essex", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 456, - "indicator_cols": ["feature.location.in_county.Franklin"], - "error_scale": 0.05, - "description": "County factor for Franklin", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 613, - "indicator_cols": ["feature.location.in_county.Hampden"], - "error_scale": 0.05, - "description": "County factor for Hampden", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 500, - "indicator_cols": ["feature.location.in_county.Hampshire"], - "error_scale": 0.05, - "description": "County factor for Hampshire", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 2497, - "indicator_cols": ["feature.location.in_county.Middlesex"], - "error_scale": 0.05, - "description": "County factor for Middlesex", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 5840, - "indicator_cols": ["feature.location.in_county.Nantucket"], - "error_scale": 0.05, - "description": "County factor for Nantucket", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 2624, - "indicator_cols": ["feature.location.in_county.Norfolk"], - "error_scale": 0.05, - "description": "County factor for Norfolk", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 791, - "indicator_cols": ["feature.location.in_county.Plymouth"], - "error_scale": 0.05, - "description": "County factor for Plymouth", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 3591, - "indicator_cols": ["feature.location.in_county.Suffolk"], - "error_scale": 0.05, - "description": "County factor for Suffolk", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 752, - "indicator_cols": ["feature.location.in_county.Worcester"], - "error_scale": 0.05, - "description": "County factor for Worcester", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 0, - "indicator_cols": ["feature.location.in_county.Barnstable"], - "error_scale": 0.05, - "description": "County factor for Barnstable", - "source": "Heat pump cost model", - "units": "$", - "per": "county factor" - }, - { - "type": "LinearQuantity", - "coefficient": 438, - "indicator_cols": ["feature.system.has_gas.true"], - "error_scale": 0.05, - "description": "Gas availability factor", - "source": "Heat pump cost model", - "units": "$", - "per": "gas availability factor" - }, - { - "type": "LinearQuantity", - "coefficient": 0, - "indicator_cols": ["feature.system.has_gas.false"], - "error_scale": 0, - "description": "Gas availability factor", - "source": "Heat pump cost model", - "units": "$", - "per": "gas availability factor" - }, - { - "type": "LinearQuantity", - "coefficient": 334, - "indicator_cols": ["feature.system.has_cooling.true"], - "error_scale": 0.05, - "description": "Cooling availability factor", - "source": "Heat pump cost model", - "units": "$", - "per": "cooling availability factor" - }, - { - "type": "LinearQuantity", - "coefficient": 0, - "indicator_cols": ["feature.system.has_cooling.false"], - "error_scale": 0, - "description": "Cooling availability factor", - "source": "Heat pump cost model", - "units": "$", - "per": "cooling availability factor" } ] }, { "trigger_column": "Cooling", "initial": null, - "final": "ASHPCooling", + "final": "WindowASHP", "order": ["FixedQuantity"], "quantity_factors": [ { "type": "FixedQuantity", - "amount": 334, + "amount": 2000, "error_scale": 0.05, "description": "Base intercept for ASHP cost based on capacity and building characteristics", "source": "Heat pump cost model" @@ -1071,7 +843,7 @@ "quantity_factors": [ { "type": "FixedQuantity", - "amount": 1334, + "amount": 334, "error_scale": 0.05, "description": "Base intercept for GSHP cost based on capacity and building characteristics", "source": "Heat pump cost model" @@ -1087,29 +859,29 @@ { "type": "FixedQuantity", "amount": -79, - "error_scale": 0.05, + "error_scale": 0.1, "description": "Base intercept for ASHP cost based on capacity and building characteristics", "source": "Heat pump cost model" }, { "type": "LinearQuantity", - "coefficient": 3.496, + "coefficient": 2.996, "indicator_cols": [ "feature.geometry.energy_model_conditioned_area" ], - "error_scale": 0.05, - "units": "$/kW", + "error_scale": 0.1, + "units": "$/sqm", "per": "heating system", "description": "Area-based component for ASHP cost based on capacity and building characteristics", "source": "Heat pump cost model" }, { "type": "LinearQuantity", - "coefficient": 350, + "coefficient": 378, "indicator_cols": [ "feature.calculated.heating_capacity_kW" ], - "error_scale": 0.05, + "error_scale": 0.1, "units": "$/kW", "per": "heating system", "description": "Heating capacity component for ASHP cost based on capacity and building characteristics", @@ -1119,7 +891,7 @@ "type": "LinearQuantity", "coefficient": 948, "indicator_cols": ["feature.location.in_county.Berkshire"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Berkshire", "source": "Heat pump cost model", "units": "$", @@ -1129,7 +901,7 @@ "type": "LinearQuantity", "coefficient": 924, "indicator_cols": ["feature.location.in_county.Bristol"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Bristol", "source": "Heat pump cost model", "units": "$", @@ -1139,7 +911,7 @@ "type": "LinearQuantity", "coefficient": 4777, "indicator_cols": ["feature.location.in_county.Dukes"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Dukes", "source": "Heat pump cost model", "units": "$", @@ -1149,7 +921,7 @@ "type": "LinearQuantity", "coefficient": 2864, "indicator_cols": ["feature.location.in_county.Essex"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Essex", "source": "Heat pump cost model", "units": "$", @@ -1159,7 +931,7 @@ "type": "LinearQuantity", "coefficient": 456, "indicator_cols": ["feature.location.in_county.Franklin"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Franklin", "source": "Heat pump cost model", "units": "$", @@ -1169,7 +941,7 @@ "type": "LinearQuantity", "coefficient": 613, "indicator_cols": ["feature.location.in_county.Hampden"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Hampden", "source": "Heat pump cost model", "units": "$", @@ -1179,7 +951,7 @@ "type": "LinearQuantity", "coefficient": 500, "indicator_cols": ["feature.location.in_county.Hampshire"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Hampshire", "source": "Heat pump cost model", "units": "$", @@ -1189,7 +961,7 @@ "type": "LinearQuantity", "coefficient": 2497, "indicator_cols": ["feature.location.in_county.Middlesex"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Middlesex", "source": "Heat pump cost model", "units": "$", @@ -1199,7 +971,7 @@ "type": "LinearQuantity", "coefficient": 5840, "indicator_cols": ["feature.location.in_county.Nantucket"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Nantucket", "source": "Heat pump cost model", "units": "$", @@ -1209,7 +981,7 @@ "type": "LinearQuantity", "coefficient": 2624, "indicator_cols": ["feature.location.in_county.Norfolk"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Norfolk", "source": "Heat pump cost model", "units": "$", @@ -1219,7 +991,7 @@ "type": "LinearQuantity", "coefficient": 791, "indicator_cols": ["feature.location.in_county.Plymouth"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Plymouth", "source": "Heat pump cost model", "units": "$", @@ -1229,7 +1001,7 @@ "type": "LinearQuantity", "coefficient": 3591, "indicator_cols": ["feature.location.in_county.Suffolk"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Suffolk", "source": "Heat pump cost model", "units": "$", @@ -1239,7 +1011,7 @@ "type": "LinearQuantity", "coefficient": 752, "indicator_cols": ["feature.location.in_county.Worcester"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Worcester", "source": "Heat pump cost model", "units": "$", @@ -1249,7 +1021,7 @@ "type": "LinearQuantity", "coefficient": 0, "indicator_cols": ["feature.location.in_county.Barnstable"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "County factor for Barnstable", "source": "Heat pump cost model", "units": "$", @@ -1259,7 +1031,7 @@ "type": "LinearQuantity", "coefficient": 438, "indicator_cols": ["feature.system.has_gas.true"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "Gas availability factor", "source": "Heat pump cost model", "units": "$", @@ -1279,7 +1051,7 @@ "type": "LinearQuantity", "coefficient": 334, "indicator_cols": ["feature.system.has_cooling.true"], - "error_scale": 0.05, + "error_scale": 0.1, "description": "Cooling availability factor", "source": "Heat pump cost model", "units": "$", @@ -1306,7 +1078,7 @@ "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 6.46, + "coefficient": 5.5, "indicator_cols": [ "feature.geometry.computed.footprint_area", "feature.geometry.est_fp_ratio" @@ -1314,8 +1086,8 @@ "error_scale": 0.05, "units": "$/m2", "per": "footprint area", - "description": "", - "source": "" + "description": "D", + "source": "EVS Dataset for Pipe Tenting, Angi, HomeAdvisor. About 0.2 ft of exposed pipe per footprint" } ] }, @@ -1327,7 +1099,7 @@ "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 32.29, + "coefficient": 16.14, "indicator_cols": [ "feature.geometry.computed.footprint_area", "feature.geometry.est_fp_ratio" @@ -1335,8 +1107,8 @@ "error_scale": 0.05, "units": "$/m2", "per": "footprint area", - "description": "", - "source": "" + "description": "D", + "source": "RSMeans new insulated ductwork. About .1 ft of exposed ductwork per footprint" } ] }, @@ -1348,7 +1120,7 @@ "quantity_factors": [ { "type": "LinearQuantity", - "coefficient": 11.48, + "coefficient": 8.6, "indicator_cols": [ "feature.geometry.computed.perimeter", "feature.extra_spaces.basement.exists.num", @@ -1363,7 +1135,7 @@ }, { "type": "LinearQuantity", - "coefficient": 32.2917, + "coefficient": 18.7, "indicator_cols": [ "feature.geometry.computed.footprint_area", "feature.geometry.est_fp_ratio", @@ -1373,8 +1145,8 @@ "error_scale": 0.05, "units": "$/m2", "per": "footprint area", - "description": "", - "source": "" + "description": "D Basement sills and ceiling 6inch", + "source": "RSMeans and EVS Data" } ] }, @@ -1396,14 +1168,13 @@ "error_scale": 0.05, "units": "$/m", "per": "perimeter", - "description": "", - "source": "" + "description": "Apply 2in XPS to basement wall and knee wall, apply dimple mat ($1), build 4in stud wall ($4) w R11 batt insulation, no drywall. 8ft basement", + "source": "Homewyse, various online market" } ] } ], "output_key": "cost", "raise_on_duplicate_trigger": true, - "create_metadata": false, - "metadata_aggregation": null + "create_metadata": false } diff --git a/epengine/models/inference.py b/epengine/models/inference.py index 5148cba..8d5304a 100644 --- a/epengine/models/inference.py +++ b/epengine/models/inference.py @@ -1287,6 +1287,70 @@ def make_priors(self): prior_dict["feature.fuels.emissions.NaturalGas"] = gas_emissions_prior prior_dict["feature.fuels.emissions.Oil"] = oil_emissions_prior + # --- Solar-related priors --- + # Ensure solar priors are part of the main Priors set, so changes to + # feature.semantic.OnsiteSolar appear in the dependency graph and are + # picked up by select_prior_tree_for_changed_features. + + # Annual yield kWh/kW-year + solar_yield_prior = UnconditionalPrior( + sampler=ClippedNormalSampler( + mean=1100, + std=150, + clip_min=800, + clip_max=1400, + ) + ) + prior_dict["feature.solar.yield_kWh_per_kW_year"] = solar_yield_prior + + # Panel power density W/m2 + panel_power_density_prior = UnconditionalPrior( + sampler=ClippedNormalSampler( + mean=180, + std=50, + clip_min=120, + clip_max=300, + ) + ) + prior_dict["feature.solar.panel_power_density_w_per_m2"] = ( + panel_power_density_prior + ) + + # Upgraded coverage depends on semantic OnsiteSolar choice + solar_coverage_prior = ConditionalPrior( + source_feature="feature.semantic.OnsiteSolar", + fallback_prior=None, + conditions=[ + ConditionalPriorCondition( + match_val="LowSolarPV", sampler=FixedValueSampler(value=0.25) + ), + ConditionalPriorCondition( + match_val="MedSolarPV", sampler=FixedValueSampler(value=0.50) + ), + ConditionalPriorCondition( + match_val="MaxSolarPV", sampler=FixedValueSampler(value=1.0) + ), + ConditionalPriorCondition( + match_val="NoSolarPV", sampler=FixedValueSampler(value=0.0) + ), + ConditionalPriorCondition( + match_val="ExistingSolarPV", sampler=FixedValueSampler(value=0.0) + ), + ], + ) + prior_dict["feature.solar.upgraded_coverage"] = solar_coverage_prior + + # Max roof utilization for PV placement + max_roof_utilization_prior = UnconditionalPrior( + sampler=ClippedNormalSampler( + mean=0.75, + std=0.05, + clip_min=0.6, + clip_max=0.9, + ) + ) + prior_dict["feature.solar.max_roof_utilization"] = max_roof_utilization_prior + # TODO: optionally create the matrix for moving raw values to # various energy end uses, fuels, emissions, costs. @@ -1314,44 +1378,74 @@ def generator(self) -> np.random.Generator: """The random number generator for the experiment.""" return np.random.default_rng(42) + # TODO: remove this function? def add_solar_features(self, features: pd.DataFrame) -> pd.DataFrame: """Add solar-related features to the features DataFrame.""" - # Add solar yield as a base feature (Massachusetts average) - yield_sampler = ClippedNormalSampler( - mean=1100, - std=150, - clip_min=800, - clip_max=1400, - ) - features["feature.solar.yield_kWh_per_kW_year"] = yield_sampler.sample( - features, len(features), self.generator - ) - panel_power_density_sampler = ClippedNormalSampler( - mean=180, - std=50, - clip_min=120, - clip_max=300, - ) - features["feature.solar.panel_power_density_w_per_m2"] = ( - panel_power_density_sampler.sample(features, len(features), self.generator) - ) + # Do not overwrite if priors already sampled these columns + if "feature.solar.yield_kWh_per_kW_year" not in features.columns: + yield_sampler = ClippedNormalSampler( + mean=1100, + std=150, + clip_min=800, + clip_max=1400, + ) + features["feature.solar.yield_kWh_per_kW_year"] = yield_sampler.sample( + features, len(features), self.generator + ) + + if "feature.solar.panel_power_density_w_per_m2" not in features.columns: + panel_power_density_sampler = ClippedNormalSampler( + mean=180, + std=50, + clip_min=120, + clip_max=300, + ) + features["feature.solar.panel_power_density_w_per_m2"] = ( + panel_power_density_sampler.sample( + features, len(features), self.generator + ) + ) + # Set default value for OnsiteSolar if not provided if "feature.semantic.OnsiteSolar" not in features.columns: features["feature.semantic.OnsiteSolar"] = "NoSolarPV" - # Calculate upgraded solar coverage based on semantic field - features["feature.solar.upgraded_coverage"] = np.where( - features["feature.semantic.OnsiteSolar"] == "LowSolarPV", - 0.25, - np.where( - features["feature.semantic.OnsiteSolar"] == "MedSolarPV", - 0.50, - np.where( - features["feature.semantic.OnsiteSolar"] == "MaxSolarPV", - 1.0, - 0.0, - ), - ), - ) + + # Create a consolidated upgraded coverage column for downstream cost logic, if missing + if "feature.solar.upgraded_coverage" not in features.columns: + coverage_prior = ConditionalPrior( + source_feature="feature.semantic.OnsiteSolar", + fallback_prior=None, + conditions=[ + ConditionalPriorCondition( + match_val="LowSolarPV", sampler=FixedValueSampler(value=0.25) + ), + ConditionalPriorCondition( + match_val="MedSolarPV", sampler=FixedValueSampler(value=0.50) + ), + ConditionalPriorCondition( + match_val="MaxSolarPV", sampler=FixedValueSampler(value=1.0) + ), + ConditionalPriorCondition( + match_val="NoSolarPV", sampler=FixedValueSampler(value=0.0) + ), + ], + ) + features["feature.solar.upgraded_coverage"] = coverage_prior.sample( + features, len(features), self.generator + ) + + if "feature.solar.max_roof_utilization" not in features.columns: + max_roof_utilization_sampler = ClippedNormalSampler( + mean=0.75, + std=0.05, + clip_min=0.6, + clip_max=0.9, + ) + features["feature.solar.max_roof_utilization"] = ( + max_roof_utilization_sampler.sample( + features, len(features), self.generator + ) + ) features["feature.upgrade.solar_pv_kW"] = 0.0 @@ -1362,23 +1456,9 @@ def update_max_solar_coverage( ) -> pd.DataFrame: """Update the MaxSolarPV coverage when electricity consumption data is available.""" features = features.copy() - - # Set coverage values based on solar type - features["feature.solar.upgraded_coverage"] = np.where( - features["feature.semantic.OnsiteSolar"] == "MaxSolarPV", - features["feature.solar.upgraded_coverage"], - np.where( - features["feature.semantic.OnsiteSolar"] == "LowSolarPV", - 0.25, - np.where( - features["feature.semantic.OnsiteSolar"] == "MedSolarPV", - 0.50, - 0.0, - ), - ), - ) - + # Base coverage values for non-Max choices # Handle MaxSolarPV samples - calculate feasible coverage for each + base_coverage = features["feature.solar.upgraded_coverage"] max_solar_mask = features["feature.semantic.OnsiteSolar"] == "MaxSolarPV" if max_solar_mask.any(): max_feasible = self.calculate_feasible_solar_coverage( @@ -1386,9 +1466,9 @@ def update_max_solar_coverage( electricity_consumption.loc[max_solar_mask], ) # Use the maximum feasible coverage for each sample - features.loc[max_solar_mask, "feature.solar.upgraded_coverage"] = ( - max_feasible - ) + base_coverage[max_solar_mask] = max_feasible + + features["feature.solar.upgraded_coverage"] = base_coverage return features @@ -1412,10 +1492,7 @@ def make_features(self, n: int) -> tuple[pd.DataFrame, pd.DataFrame]: ) df = priors.sample(df, n, self.generator) - # Add solar features - df = self.add_solar_features(df) - - # Defer solar upgrade capacity calculation to the post-prediction phase + # Solar features are now included in priors; avoid re-sampling here original_cooling = None mask = None @@ -1451,8 +1528,6 @@ def make_retrofit_cost_features( safety_factor = 1.2 raw_capacity_kW = peak_heating_per_m2 * safety_factor - - # Map calculated capacity to nearest available equipment size (unless above max) available_sizes_kW = np.array([ 5.3, 7.0, @@ -1535,13 +1610,11 @@ def oh_col_name_for_county(county: str) -> str: "feature.system.has_cooling.true" ] - # Add solar system size for retrofit cost calculations if features["feature.semantic.OnsiteSolar"].iloc[0] in [ "LowSolarPV", "MedSolarPV", "MaxSolarPV", ]: - # Calculate the required solar system size for the upgrade electricity_consumption = elect_eui * self.actual_conditioned_area_m2 # Update MaxSolarPV coverage if needed @@ -1556,7 +1629,6 @@ def oh_col_name_for_county(county: str) -> str: cost_features["feature.upgrade.solar_pv_kW"] = required_system_size else: - # No solar upgrade, set to 0 cost_features["feature.upgrade.solar_pv_kW"] = 0.0 return cost_features @@ -1795,7 +1867,8 @@ def compute_costs( ) end_use_costs = cast( pd.DataFrame, - base_end_use_costs.groupby(level="EndUse", axis=1).sum(), + # Avoid deprecated axis=1: use transpose-then-groupby pattern + base_end_use_costs.T.groupby(level="EndUse").sum().T, ) solar_cost_total = net_elec_costs.sum(axis=1) - elec_costs.sum(axis=1) end_use_costs["Solar"] = solar_cost_total @@ -1847,7 +1920,8 @@ def compute_emissions( ) end_use_emissions = cast( pd.DataFrame, - allowed_end_use_emissions.groupby(level="EndUse", axis=1).sum(), + # Avoid deprecated axis=1: use transpose-then-groupby pattern + allowed_end_use_emissions.T.groupby(level="EndUse").sum().T, ) solar_emissions_total = net_elec_emissions.sum(axis=1) - elec_emissions.sum( axis=1 @@ -1856,7 +1930,8 @@ def compute_emissions( fuel_emissions = cast( pd.DataFrame, - disaggregated_emissions.groupby(level="Fuel", axis=1).sum(), + # Avoid deprecated axis=1: use transpose-then-groupby pattern + disaggregated_emissions.T.groupby(level="Fuel").sum().T, ) return fuel_emissions, end_use_emissions @@ -2004,13 +2079,12 @@ def calculate_feasible_solar_coverage( roof_area_m2 = features["feature.geometry.computed.roof_surface_area"] # Solar panel assumptions - # panel_efficiency = 0.22 panel_power_density = features["feature.solar.panel_power_density_w_per_m2"] - max_roof_utilization = 0.50 # Only 50% of roof can be covered, assuming we have a fire safety boundary. This is a very high level estimate - # TODO: Account for roof angle, orientation, and shading - # Calculate maximum solar capacity possible - max_solar_area_m2 = roof_area_m2 * max_roof_utilization + # TODO: Account for roof angle, orientation, and shading + max_solar_area_m2 = ( + roof_area_m2 * features["feature.solar.max_roof_utilization"] + ) max_solar_capacity_kW = (max_solar_area_m2 * panel_power_density) / 1000 max_local_solar_capacity_kW = 25 mask = max_solar_capacity_kW > max_local_solar_capacity_kW @@ -2074,7 +2148,6 @@ def apply_solar_to_electricity_consumption( """Apply solar generation to electricity consumption to get net consumption.""" net_consumption = electricity_consumption.copy() - # Get the OnsiteSolar semantic field value - vectorized approach if "feature.semantic.OnsiteSolar" not in features.columns: # No solar column, return original consumption return net_consumption @@ -2200,12 +2273,14 @@ def run( # noqa: C901 changed_feature_fields, changed_context_fields = self.changed_context_fields changed_feature_names = set(changed_feature_fields.keys()) + print("CHANGED FEATURE NAMES", changed_feature_names) # then we will get the priors that must be re-run as they are downstream # of the changed features. changed_priors = original_priors.select_prior_tree_for_changed_features( changed_feature_names ) + print(changed_priors.sampled_features.keys()) # then we will take the original features and update the changed semantic # features. @@ -2217,14 +2292,18 @@ def run( # noqa: C901 new_features = changed_priors.sample( new_features, len(new_features), self.original.generator ) + print(new_features.columns) new_transformed_features = self.original.source_feature_transform.transform( new_features ) # Get peak results for cost calculations new_results_raw = self.original.predict(new_transformed_features) - new_results = self.original.compute_distributions(new_features, new_results_raw) + new_results_energy = cast(pd.DataFrame, new_results_raw["Energy"]) + + new_results = self.original.compute_distributions( + new_features, new_results_energy + ) new_results_peak = cast(pd.DataFrame, new_results_raw["Peak"]) - # new_results_energy = cast(pd.DataFrame, new_results_raw["Energy"]) # finally, we compute the deltas and the corresponding summary # statistics. @@ -2256,23 +2335,16 @@ def run( # noqa: C901 # Compute features for cost calculations after inference # For solar upgrades, we need to use the ACTUAL electricity consumption (before solar) - # to calculate the system size needed, not the net consumption if there is alrearyd some solar - upgraded_spec = SBEMInferenceRequestSpec( - **{ - k: v - for k, v in self.original.model_dump().items() - if k != "semantic_field_context" - }, - semantic_field_context=self.upgraded_semantic_field_context, - ) + # to calculate the system size needed, not the net consumption if there is already some solar - electricity_eui = upgraded_spec._actual_electricity_consumption.sum(axis=1) + # TODO: update the sampling to occur immidiately before the inference runs - # Calculate the feature distributions for solar features (yield, coverage) - new_features_with_solar = upgraded_spec.add_solar_features(new_features) + electricity_eui = self.original._actual_electricity_consumption.sum(axis=1) - features_for_costs = upgraded_spec.make_retrofit_cost_features( - new_features_with_solar, new_results_peak, electricity_eui + # Calculate the feature distributions for solar features (yield, coverage) + # Solar features are included in priors; use new_features directly + features_for_costs = self.original.make_retrofit_cost_features( + new_features, new_results_peak, electricity_eui ) retrofit_costs = self.compute_retrofit_costs(features_for_costs, cost_config) From ba264a5a5c06ed543a8d41a107bbcca50290129c Mon Sep 17 00:00:00 2001 From: DARYA GUETTLER Date: Thu, 16 Oct 2025 09:14:15 -0400 Subject: [PATCH 6/6] update max solar cost --- epengine/models/data/retrofit-costs.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epengine/models/data/retrofit-costs.json b/epengine/models/data/retrofit-costs.json index 3cb8f3c..d8d599c 100644 --- a/epengine/models/data/retrofit-costs.json +++ b/epengine/models/data/retrofit-costs.json @@ -39,7 +39,7 @@ { "trigger_column": "OnsiteSolar", "initial": "NoSolarPV", - "final": "HighSolarPV", + "final": "MaxSolarPV", "order": ["LinearQuantity"], "quantity_factors": [ {