From 808fc10b91ff3fa32320c23bfc5eb14cf38e5462 Mon Sep 17 00:00:00 2001
From: DARYA GUETTLER <daryag@mit.edu>
Date: Fri, 3 Oct 2025 11:38:18 -0400
Subject: [PATCH 1/6] update to add solar as a negative end use

---
 epengine/models/inference.py | 111 ++++++++++++++++++++++++-----------
 1 file changed, 76 insertions(+), 35 deletions(-)

diff --git a/epengine/models/inference.py b/epengine/models/inference.py
index 4e29f2e..bb2d443 100644
--- a/epengine/models/inference.py
+++ b/epengine/models/inference.py
@@ -61,7 +61,8 @@
 )
 from epengine.models.transforms import CategoricalFeature, RegressorInputSpec
 
-END_USES = ("Lighting", "Equipment", "DomesticHotWater", "Heating", "Cooling")
+END_USES = ("Lighting", "Equipment", "DomesticHotWater", "Heating", "Cooling", "Solar")
+RAW_END_USES = ("Lighting", "Equipment", "DomesticHotWater", "Heating", "Cooling")
 OIL_HEATING_SYSTEMS = ["OilHeating"]
 NG_HEATING_SYSTEMS = ["NaturalGasHeating", "NaturalGasCondensingHeating"]
 INCOME_BRACKETS = [
@@ -85,7 +86,7 @@
 _has_warned_max_solar_capacity_cap = False
 
 DATASET_SEGMENT_MAP = {
-    "Raw": END_USES,
+    "Raw": RAW_END_USES,
     "EndUse": END_USES,
     "Fuel": FUELS,
     "EndUseCost": END_USES,
@@ -189,22 +190,16 @@ def serialized(self) -> BaseModel:  # noqa: C901
         field_datas = {}
         percentile_mapper = {v[1]: v[0] for v in PERCENTILES.values()}
 
-        # Create copies of summary dataframes with percentile mapper applied
         costs_summary_renamed = self.costs_summary.rename(index=percentile_mapper)
-        # paybacks_summary_renamed = self.paybacks_summary.rename(index=percentile_mapper)
 
-        # Process all numeric columns in the costs dataframe
         for col in self.costs.columns:
-            # Skip non-numeric columns (e.g., metadata objects)
             if not pd.api.types.is_numeric_dtype(self.costs[col]):
                 continue
-            # Skip detailed cost columns like cost.Trigger.Final (keep only cost.Trigger)
             if col.startswith("cost.") and col.count(".") > 1:
                 continue
             col_name = col.split(".")[-1]
             field_specs[col_name] = (SummarySpec, Field(title=col))
 
-            # Get summary data for this column
             if col in costs_summary_renamed.columns:
                 field_data = costs_summary_renamed.loc[:, col].to_dict()
             else:
@@ -295,6 +290,16 @@ def create_end_use_disaggregation_spec(SummarySpec: type[SummarySpecBase]):
     )
 
 
+def create_raw_end_use_disaggregation_spec(SummarySpec: type[SummarySpecBase]):
+    """Create a raw end use disaggregation spec WITHOUT Solar as a field."""
+    fields = {}
+    for end_use in RAW_END_USES:
+        fields[end_use] = (SummarySpec, Field(title=end_use))
+    return create_model(
+        "RawEndUseDisaggregationSpec", **fields, __config__=ConfigDict(extra="forbid")
+    )
+
+
 def create_fuel_disaggregation_spec(SummarySpec: type[SummarySpecBase]):
     """Create a fuel disaggregation spec with the fuels as fields."""
     fields = {}
@@ -306,12 +311,16 @@ def create_fuel_disaggregation_spec(SummarySpec: type[SummarySpecBase]):
 
 
 def create_disaggregation_spec(
-    EndUseDisaggregationSpec: type[BaseModel], FuelDisaggregationSpec: type[BaseModel]
+    EndUseDisaggregationSpec: type[BaseModel],
+    RawEndUseDisaggregationSpec: type[BaseModel],
+    FuelDisaggregationSpec: type[BaseModel],
 ):
     """Create a disaggregation spec with the datasets as fields."""
     fields = {}
     for dataset, dataset_segments in DATASET_SEGMENT_MAP.items():
-        if dataset_segments == END_USES:
+        if dataset == "Raw":
+            fields[dataset] = (RawEndUseDisaggregationSpec, Field(title=dataset))
+        elif dataset_segments == END_USES:
             fields[dataset] = (EndUseDisaggregationSpec, Field(title=dataset))
         elif dataset_segments == FUELS:
             fields[dataset] = (FuelDisaggregationSpec, Field(title=dataset))
@@ -377,9 +386,10 @@ def create_sbem_inference_savings_response_spec(
 
 SummarySpec = create_summary_spec()
 EndUseDisaggregationSpec = create_end_use_disaggregation_spec(SummarySpec)
+RawEndUseDisaggregationSpec = create_raw_end_use_disaggregation_spec(SummarySpec)
 FuelDisaggregationSpec = create_fuel_disaggregation_spec(SummarySpec)
 DisaggregationSpec = create_disaggregation_spec(
-    EndUseDisaggregationSpec, FuelDisaggregationSpec
+    EndUseDisaggregationSpec, RawEndUseDisaggregationSpec, FuelDisaggregationSpec
 )
 DisaggregationsSpec = create_disaggregations_spec(DisaggregationSpec)
 TotalSpec = create_total_spec(SummarySpec)
@@ -1701,21 +1711,43 @@ def separate_fuel_based_end_uses(
         )
 
         gas = pd.concat(
-            [heat_gas, cool_gas, dhw_gas, lighting * 0, equipment * 0],
+            [
+                heat_gas,
+                cool_gas,
+                dhw_gas,
+                lighting * 0,
+                equipment * 0,
+            ],
             axis=1,
-            keys=["Heating", "Cooling", "Domestic Hot Water", "Lighting", "Equipment"],
+            keys=[
+                "Heating",
+                "Cooling",
+                "Domestic Hot Water",
+                "Lighting",
+                "Equipment",
+            ],
         )[df_end_uses.columns]
         oil = pd.concat(
-            [heat_oil, cool_oil, dhw_oil, lighting * 0, equipment * 0],
+            [
+                heat_oil,
+                cool_oil,
+                dhw_oil,
+                lighting * 0,
+                equipment * 0,
+            ],
             axis=1,
-            keys=["Heating", "Cooling", "Domestic Hot Water", "Lighting", "Equipment"],
+            keys=[
+                "Heating",
+                "Cooling",
+                "Domestic Hot Water",
+                "Lighting",
+                "Equipment",
+            ],
         )[df_end_uses.columns]
 
         # Store actual electricity consumption for solar calculations
         self._actual_electricity_consumption = actual_electricity_consumption
 
-        # Use net electricity consumption for the main fuel disaggregation
-
         df_disaggregated_fuels = pd.concat(
             [actual_electricity_consumption, net_electricity_consumption, gas, oil],
             axis=1,
@@ -1755,7 +1787,14 @@ def compute_costs(
             keys=["Electricity", "NetElectricity", "NaturalGas", "Oil"],
             names=["Fuel", "EndUse"],
         )
-        end_use_costs = disaggregated_costs.T.groupby(level=["EndUse"]).sum().T
+        # EndUseCost should avoid double-counting raw Electricity; only include NetElectricity + fuels
+        allowed_end_use_costs = pd.concat(
+            [net_elec_costs, gas_costs, oil_costs],
+            axis=1,
+            keys=["NetElectricity", "NaturalGas", "Oil"],
+            names=["Fuel", "EndUse"],
+        )
+        end_use_costs = allowed_end_use_costs.T.groupby(level=["EndUse"]).sum().T
         fuel_costs = disaggregated_costs.T.groupby(level=["Fuel"]).sum().T
 
         return fuel_costs, end_use_costs
@@ -1817,6 +1856,11 @@ def compute_distributions(self, features: pd.DataFrame, results_raw: pd.DataFram
         results_disaggregated_fuels = self.separate_fuel_based_end_uses(
             df_features=features, df_end_uses=results_end_uses
         )
+        solar_end_use_total = results_disaggregated_fuels["NetElectricity"].sum(
+            axis=1
+        ) - results_disaggregated_fuels["Electricity"].sum(axis=1)
+        results_end_uses = results_end_uses.copy()
+        results_end_uses["Solar"] = solar_end_use_total
 
         results_fuels = results_disaggregated_fuels.T.groupby(level=["Fuel"]).sum().T
         results_fuel_costs, results_end_use_costs = self.compute_costs(
@@ -1870,28 +1914,25 @@ def compute_distributions(self, features: pd.DataFrame, results_raw: pd.DataFram
         total_keys: list[str] = []
 
         for dataset in datasets_for_totals:
-            if dataset == "FuelCost":
-                # Sum only NetElectricity + NaturalGas + Oil to avoid double-counting Electricity
-                if "FuelCost" in disaggregated.columns.get_level_values("Dataset"):
-                    fc = disaggregated.loc[:, ("FuelCost", slice(None))]
-                    # keep only the relevant fuels if present
-                    fuels = [
+            # Default: sum all segments for the dataset if present
+            if dataset in disaggregated.columns.get_level_values("Dataset"):
+                cols = disaggregated.xs(dataset, level="Dataset", axis=1)
+                if dataset == "FuelCost":
+                    # Only count NetElectricity + NaturalGas + Oil for FuelCost
+                    allowed = [
                         c
-                        for c in fc.columns.get_level_values("Segment")
+                        for c in cols.columns
                         if c in ("NetElectricity", "NaturalGas", "Oil")
                     ]
-                    if fuels:
-                        s = fc.loc[:, (slice(None), fuels)].sum(axis=1)
-                    else:
-                        s = pd.Series(0.0, index=disaggregated.index)
+                    s = (
+                        cols.loc[:, allowed].sum(axis=1)
+                        if len(allowed) > 0
+                        else pd.Series(0.0, index=disaggregated.index)
+                    )
                 else:
-                    s = pd.Series(0.0, index=disaggregated.index)
+                    s = cols.sum(axis=1)
             else:
-                # Default behavior: sum all segments for the dataset
-                if dataset in disaggregated.columns.get_level_values("Dataset"):
-                    s = disaggregated.xs(dataset, level="Dataset", axis=1).sum(axis=1)
-                else:
-                    s = pd.Series(0.0, index=disaggregated.index)
+                s = pd.Series(0.0, index=disaggregated.index)
 
             total_parts.append(s)
             total_keys.append(dataset)

From b488aaa173cde7797a2bcc196601513677ef8dd5 Mon Sep 17 00:00:00 2001
From: DARYA GUETTLER <daryag@mit.edu>
Date: Fri, 3 Oct 2025 12:17:28 -0400
Subject: [PATCH 2/6] update exclusion method

---
 epengine/models/inference.py | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/epengine/models/inference.py b/epengine/models/inference.py
index bb2d443..43d634f 100644
--- a/epengine/models/inference.py
+++ b/epengine/models/inference.py
@@ -1794,8 +1794,16 @@ def compute_costs(
             keys=["NetElectricity", "NaturalGas", "Oil"],
             names=["Fuel", "EndUse"],
         )
-        end_use_costs = allowed_end_use_costs.T.groupby(level=["EndUse"]).sum().T
-        fuel_costs = disaggregated_costs.T.groupby(level=["Fuel"]).sum().T
+        end_use_costs = cast(
+            pd.DataFrame,
+            allowed_end_use_costs.groupby(level="EndUse", axis=1).sum(),
+        )
+
+        if "Solar" not in end_use_costs.columns:
+            end_use_costs["Solar"] = 0.0
+        fuel_costs = cast(
+            pd.DataFrame, disaggregated_costs.T.groupby(level=["Fuel"]).sum().T
+        )
 
         return fuel_costs, end_use_costs
 
@@ -1833,8 +1841,15 @@ def compute_emissions(
             keys=["Electricity", "NetElectricity", "NaturalGas", "Oil"],
             names=["Fuel", "EndUse"],
         )
-        end_use_emissions = disaggregated_emissions.T.groupby(level=["EndUse"]).sum().T
-        fuel_emissions = disaggregated_emissions.T.groupby(level=["Fuel"]).sum().T
+        end_use_emissions = cast(
+            pd.DataFrame,
+            disaggregated_emissions.groupby(level="EndUse", axis=1).sum(),
+        )
+        if "Solar" not in end_use_emissions.columns:
+            end_use_emissions["Solar"] = 0.0
+        fuel_emissions = cast(
+            pd.DataFrame, disaggregated_emissions.T.groupby(level=["Fuel"]).sum().T
+        )
 
         return fuel_emissions, end_use_emissions
 

From aafbc1f86ee287fdb748f17f76154ae405044893 Mon Sep 17 00:00:00 2001
From: DARYA GUETTLER <daryag@mit.edu>
Date: Fri, 3 Oct 2025 12:56:44 -0400
Subject: [PATCH 3/6] update emissions calc with exclusion

---
 epengine/models/inference.py | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/epengine/models/inference.py b/epengine/models/inference.py
index 43d634f..348420e 100644
--- a/epengine/models/inference.py
+++ b/epengine/models/inference.py
@@ -1787,20 +1787,18 @@ def compute_costs(
             keys=["Electricity", "NetElectricity", "NaturalGas", "Oil"],
             names=["Fuel", "EndUse"],
         )
-        # EndUseCost should avoid double-counting raw Electricity; only include NetElectricity + fuels
-        allowed_end_use_costs = pd.concat(
-            [net_elec_costs, gas_costs, oil_costs],
+        base_end_use_costs = pd.concat(
+            [elec_costs, gas_costs, oil_costs],
             axis=1,
-            keys=["NetElectricity", "NaturalGas", "Oil"],
+            keys=["Electricity", "NaturalGas", "Oil"],
             names=["Fuel", "EndUse"],
         )
         end_use_costs = cast(
             pd.DataFrame,
-            allowed_end_use_costs.groupby(level="EndUse", axis=1).sum(),
+            base_end_use_costs.groupby(level="EndUse", axis=1).sum(),
         )
-
-        if "Solar" not in end_use_costs.columns:
-            end_use_costs["Solar"] = 0.0
+        solar_cost_total = net_elec_costs.sum(axis=1) - elec_costs.sum(axis=1)
+        end_use_costs["Solar"] = solar_cost_total
         fuel_costs = cast(
             pd.DataFrame, disaggregated_costs.T.groupby(level=["Fuel"]).sum().T
         )
@@ -1841,14 +1839,24 @@ def compute_emissions(
             keys=["Electricity", "NetElectricity", "NaturalGas", "Oil"],
             names=["Fuel", "EndUse"],
         )
+        allowed_end_use_emissions = pd.concat(
+            [elec_emissions, gas_emissions, oil_emissions],
+            axis=1,
+            keys=["Electricity", "NaturalGas", "Oil"],
+            names=["Fuel", "EndUse"],
+        )
         end_use_emissions = cast(
             pd.DataFrame,
-            disaggregated_emissions.groupby(level="EndUse", axis=1).sum(),
+            allowed_end_use_emissions.groupby(level="EndUse", axis=1).sum(),
         )
-        if "Solar" not in end_use_emissions.columns:
-            end_use_emissions["Solar"] = 0.0
+        solar_emissions_total = net_elec_emissions.sum(axis=1) - elec_emissions.sum(
+            axis=1
+        )
+        end_use_emissions["Solar"] = solar_emissions_total
+
         fuel_emissions = cast(
-            pd.DataFrame, disaggregated_emissions.T.groupby(level=["Fuel"]).sum().T
+            pd.DataFrame,
+            disaggregated_emissions.groupby(level="Fuel", axis=1).sum(),
         )
 
         return fuel_emissions, end_use_emissions
@@ -1932,8 +1940,7 @@ def compute_distributions(self, features: pd.DataFrame, results_raw: pd.DataFram
             # Default: sum all segments for the dataset if present
             if dataset in disaggregated.columns.get_level_values("Dataset"):
                 cols = disaggregated.xs(dataset, level="Dataset", axis=1)
-                if dataset == "FuelCost":
-                    # Only count NetElectricity + NaturalGas + Oil for FuelCost
+                if dataset in ("FuelCost", "FuelEmissions"):
                     allowed = [
                         c
                         for c in cols.columns

From 759c316d6bc76915163f1f561a4471a5e243a9b7 Mon Sep 17 00:00:00 2001
From: DARYA GUETTLER <daryag@mit.edu>
Date: Tue, 14 Oct 2025 10:45:40 -0400
Subject: [PATCH 4/6] update inference to correct the dependent sampling
 approach

---
 epengine/models/inference.py | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/epengine/models/inference.py b/epengine/models/inference.py
index 348420e..5148cba 100644
--- a/epengine/models/inference.py
+++ b/epengine/models/inference.py
@@ -2217,24 +2217,12 @@ def run(  # noqa: C901
         new_features = changed_priors.sample(
             new_features, len(new_features), self.original.generator
         )
-
-        # Create an upgraded spec with the new semantic field context
-        upgraded_spec = SBEMInferenceRequestSpec(
-            **{
-                k: v
-                for k, v in self.original.model_dump().items()
-                if k != "semantic_field_context"
-            },
-            semantic_field_context=self.upgraded_semantic_field_context,
+        new_transformed_features = self.original.source_feature_transform.transform(
+            new_features
         )
-        # Run inference with the upgraded spec
-        new_results = upgraded_spec.run(n)
         # Get peak results for cost calculations
-        new_results_raw = upgraded_spec.predict(
-            upgraded_spec.source_feature_transform.transform(
-                upgraded_spec.make_features(n)[0]
-            )
-        )
+        new_results_raw = self.original.predict(new_transformed_features)
+        new_results = self.original.compute_distributions(new_features, new_results_raw)
         new_results_peak = cast(pd.DataFrame, new_results_raw["Peak"])
         # new_results_energy = cast(pd.DataFrame, new_results_raw["Energy"])
 
@@ -2269,6 +2257,15 @@ def run(  # noqa: C901
         # Compute features for cost calculations after inference
         # For solar upgrades, we need to use the ACTUAL electricity consumption (before solar)
         # to calculate the system size needed, not the net consumption if there is alrearyd some solar
+        upgraded_spec = SBEMInferenceRequestSpec(
+            **{
+                k: v
+                for k, v in self.original.model_dump().items()
+                if k != "semantic_field_context"
+            },
+            semantic_field_context=self.upgraded_semantic_field_context,
+        )
+
         electricity_eui = upgraded_spec._actual_electricity_consumption.sum(axis=1)
 
         # Calculate the feature distributions for solar features (yield, coverage)

From 1f8b32dd06923cc8bb7e8976da5a0474a12a44c4 Mon Sep 17 00:00:00 2001
From: DARYA GUETTLER <daryag@mit.edu>
Date: Wed, 15 Oct 2025 22:57:04 -0400
Subject: [PATCH 5/6] move solar priors to make_priors

---
 epengine/models/data/retrofit-costs.json | 593 +++++++----------------
 epengine/models/inference.py             | 244 ++++++----
 2 files changed, 340 insertions(+), 497 deletions(-)

diff --git a/epengine/models/data/retrofit-costs.json b/epengine/models/data/retrofit-costs.json
index d6c1f1f..3cb8f3c 100644
--- a/epengine/models/data/retrofit-costs.json
+++ b/epengine/models/data/retrofit-costs.json
@@ -13,7 +13,7 @@
                     "error_scale": 0.05,
                     "units": "$/kW",
                     "per": "solar PV capacity",
-                    "description": "PV installation cost per W ($3.04/W)",
+                    "description": "D PV installation cost per W ($3.04/W)",
                     "source": "EnergySage"
                 }
             ]
@@ -31,7 +31,7 @@
                     "error_scale": 0.05,
                     "units": "$/kW",
                     "per": "solar PV capacity",
-                    "description": "PV installation cost per W ($3.04/W)",
+                    "description": "D PV installation cost per W ($3.04/W)",
                     "source": "EnergySage"
                 }
             ]
@@ -39,7 +39,7 @@
         {
             "trigger_column": "OnsiteSolar",
             "initial": "NoSolarPV",
-            "final": "MaxSolarPV",
+            "final": "HighSolarPV",
             "order": ["LinearQuantity"],
             "quantity_factors": [
                 {
@@ -49,7 +49,7 @@
                     "error_scale": 0.05,
                     "units": "$/kW",
                     "per": "solar PV capacity",
-                    "description": "PV installation cost per W ($3.04/W)",
+                    "description": "D PV installation cost per W ($3.04/W)",
                     "source": "EnergySage"
                 }
             ]
@@ -69,7 +69,7 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "gross floor area",
-                    "description": "",
+                    "description": "D",
                     "source": ""
                 }
             ]
@@ -82,7 +82,7 @@
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 8.83,
+                    "coefficient": 100,
                     "indicator_cols": [
                         "feature.geometry.energy_model_conditioned_area",
                         "feature.geometry.est_fp_ratio"
@@ -90,7 +90,7 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "gross floor area",
-                    "description": "",
+                    "description": "D not possible",
                     "source": ""
                 }
             ]
@@ -103,7 +103,7 @@
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 8.83,
+                    "coefficient": 100,
                     "indicator_cols": [
                         "feature.geometry.energy_model_conditioned_area",
                         "feature.geometry.est_fp_ratio"
@@ -111,7 +111,7 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "gross floor area",
-                    "description": "",
+                    "description": "D not possible",
                     "source": ""
                 }
             ]
@@ -120,62 +120,82 @@
             "trigger_column": "Weatherization",
             "initial": "SomewhatLeakyEnvelope",
             "final": "TightEnvelope",
-            "order": ["LinearQuantity"],
+            "order": ["FixedQuantity", "LinearQuantity"],
             "quantity_factors": [
+                {
+                    "type": "FixedQuantity",
+                    "amount": 577,
+                    "error_scale": 0.05,
+                    "description": "Base intercept for air seal cost based on conditioned area",
+                    "source": "D EVS dataset"
+                },
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 6.35,
+                    "coefficient": 1.345,
                     "indicator_cols": [
                         "feature.geometry.energy_model_conditioned_area",
                         "feature.geometry.est_fp_ratio"
                     ],
-                    "error_scale": 0.05,
+                    "error_scale": 0.25,
                     "units": "$/m2",
                     "per": "gross floor area",
-                    "description": "",
+                    "description": "D EVS dataset",
                     "source": ""
                 }
             ]
         },
         {
             "trigger_column": "Weatherization",
-            "initial": "SomewhatLeakyEnvelope",
+            "initial": "TightEnvelope",
             "final": "TightEnvelopeHRV",
-            "order": ["LinearQuantity"],
+            "order": ["FixedQuantity", "LinearQuantity"],
             "quantity_factors": [
+                {
+                    "type": "FixedQuantity",
+                    "amount": 4524,
+                    "error_scale": 0.05,
+                    "description": "2000 ERV 2000 istall, EVS intercept",
+                    "source": "D REMDB and homewyse"
+                },
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 6.35,
+                    "coefficient": 1.87,
                     "indicator_cols": [
                         "feature.geometry.energy_model_conditioned_area",
                         "feature.geometry.est_fp_ratio"
                     ],
-                    "error_scale": 0.05,
+                    "error_scale": 0.15,
                     "units": "$/m2",
                     "per": "gross floor area",
-                    "description": "",
+                    "description": "Upper end job. Uses EVS data above .1 ACH drop starting from below .4 ACH",
                     "source": ""
                 }
             ]
         },
         {
-            "trigger_column": "RoofInsulation",
-            "initial": "InsulatedRoof",
-            "final": "HighlyInsulatedRoof",
-            "order": ["LinearQuantity"],
+            "trigger_column": "Weatherization",
+            "initial": "LeakyEnvelope",
+            "final": "SomewhatLeakyEnvelope",
+            "order": ["FixedQuantity", "LinearQuantity"],
             "quantity_factors": [
+                {
+                    "type": "FixedQuantity",
+                    "amount": 577,
+                    "error_scale": 0.05,
+                    "description": "Base intercept for air seal cost based on conditioned area",
+                    "source": "EVS dataset"
+                },
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 129.17,
+                    "coefficient": 1.345,
                     "indicator_cols": [
-                        "feature.geometry.computed.roof_surface_area",
-                        "feature.geometry.roof_is_flat.num",
+                        "feature.geometry.energy_model_conditioned_area",
                         "feature.geometry.est_fp_ratio"
                     ],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "units": "$/m2",
-                    "per": "roof surface area",
-                    "description": "",
+                    "per": "gross floor area",
+                    "description": "D EVS dataset",
                     "source": ""
                 }
             ]
@@ -188,79 +208,87 @@
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 129.17,
+                    "coefficient": 26.9,
                     "indicator_cols": [
-                        "feature.geometry.computed.roof_surface_area",
-                        "feature.geometry.roof_is_flat.num",
+                        "feature.geometry.computed.footprint_area",
+                        "feature.geometry.roof_is_attic.num",
                         "feature.geometry.est_fp_ratio"
                     ],
-                    "error_scale": 0.05,
+                    "error_scale": 0.2,
                     "units": "$/m2",
                     "per": "roof surface area",
                     "description": "",
                     "source": ""
-                },
+                }
+            ]
+        },
+        {
+            "trigger_column": "RoofInsulation",
+            "initial": "InsulatedRoof",
+            "final": "HighlyInsulatedRoof",
+            "order": ["LinearQuantity"],
+            "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 142.73,
+                    "coefficient": 127,
                     "indicator_cols": [
                         "feature.geometry.computed.roof_surface_area",
-                        "feature.geometry.roof_is_attic.num",
+                        "feature.geometry.roof_is_flat.num",
                         "feature.geometry.est_fp_ratio"
                     ],
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "roof surface area",
-                    "description": "",
+                    "description": "D 4in xps or similar, air barrier, roof membrane, cover board, blown inside rafters",
                     "source": ""
                 }
             ]
         },
-
         {
-            "trigger_column": "Walls",
-            "initial": "UninsulatedWalls",
-            "final": "FullInsulationWallsCavity",
+            "trigger_column": "RoofInsulation",
+            "initial": "UninsulatedRoof",
+            "final": "HighlyInsulatedRoof",
             "order": ["LinearQuantity"],
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 7.21,
+                    "coefficient": 127,
                     "indicator_cols": [
-                        "feature.geometry.computed.whole_bldg_facade_area",
-                        "feature.geometry.est_uniform_linear_scaling_factor"
+                        "feature.geometry.computed.roof_surface_area",
+                        "feature.geometry.roof_is_flat.num",
+                        "feature.geometry.est_fp_ratio"
                     ],
                     "error_scale": 0.05,
                     "units": "$/m2",
-                    "per": "facade area",
-                    "description": "",
-                    "source": ""
+                    "per": "roof surface area",
+                    "description": "D 4in xps or similar, air barrier, roof membrane, cover board, blown inside rafters",
+                    "source": "RSMeans"
                 },
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 30.76,
+                    "coefficient": 142.73,
                     "indicator_cols": [
-                        "feature.geometry.computed.total_linear_facade_distance",
-                        "feature.geometry.est_uniform_linear_scaling_factor"
+                        "feature.geometry.computed.roof_surface_area",
+                        "feature.geometry.roof_is_attic.num",
+                        "feature.geometry.est_fp_ratio"
                     ],
                     "error_scale": 0.05,
-                    "units": "$/m",
-                    "per": "total linear facade distance",
-                    "description": "",
+                    "units": "$/m2",
+                    "per": "roof surface area",
+                    "description": "D Closed cell spray foam and drywall",
                     "source": ""
                 }
             ]
         },
-
         {
             "trigger_column": "Walls",
             "initial": "UninsulatedWalls",
-            "final": "SIP",
+            "final": "FullInsulationWallsCavity",
             "order": ["LinearQuantity"],
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 9.21,
+                    "coefficient": 25,
                     "indicator_cols": [
                         "feature.geometry.computed.whole_bldg_facade_area",
                         "feature.geometry.est_uniform_linear_scaling_factor"
@@ -268,21 +296,8 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "facade area",
-                    "description": "",
-                    "source": ""
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 30.76,
-                    "indicator_cols": [
-                        "feature.geometry.computed.total_linear_facade_distance",
-                        "feature.geometry.est_uniform_linear_scaling_factor"
-                    ],
-                    "error_scale": 0.05,
-                    "units": "$/m",
-                    "per": "total linear facade distance",
-                    "description": "",
-                    "source": ""
+                    "description": "D",
+                    "source": "EVS dataset cross verified RSMeans and LBNL Study"
                 }
             ]
         },
@@ -294,7 +309,7 @@
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 188.9,
+                    "coefficient": 197,
                     "indicator_cols": [
                         "feature.geometry.computed.whole_bldg_facade_area",
                         "feature.geometry.est_uniform_linear_scaling_factor"
@@ -302,21 +317,8 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "facade area",
-                    "description": "",
-                    "source": ""
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 30.76,
-                    "indicator_cols": [
-                        "feature.geometry.computed.total_linear_facade_distance",
-                        "feature.geometry.est_uniform_linear_scaling_factor"
-                    ],
-                    "error_scale": 0.05,
-                    "units": "$/m",
-                    "per": "total linear facade distance",
-                    "description": "",
-                    "source": ""
+                    "description": "D inside cavity plus exterior 25 plus 172",
+                    "source": "EVS dataset cross verified RSMeans and LBNL Study"
                 }
             ]
         },
@@ -328,29 +330,16 @@
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 14.42,
+                    "coefficient": 45,
                     "indicator_cols": [
                         "feature.geometry.computed.whole_bldg_facade_area",
                         "feature.geometry.est_uniform_linear_scaling_factor"
                     ],
-                    "error_scale": 0.05,
+                    "error_scale": 0.25,
                     "units": "$/m2",
                     "per": "facade area",
-                    "description": "",
-                    "source": ""
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 61.52,
-                    "indicator_cols": [
-                        "feature.geometry.computed.total_linear_facade_distance",
-                        "feature.geometry.est_uniform_linear_scaling_factor"
-                    ],
-                    "error_scale": 0.05,
-                    "units": "$/m",
-                    "per": "total linear facade distance",
-                    "description": "",
-                    "source": ""
+                    "description": "D inside cavity plus remove 25 plus 20, uncertainty",
+                    "source": "EVS dataset cross verified RSMeans and LBNL Study"
                 }
             ]
         },
@@ -362,29 +351,16 @@
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 196.11,
+                    "coefficient": 217,
                     "indicator_cols": [
                         "feature.geometry.computed.whole_bldg_facade_area",
                         "feature.geometry.est_uniform_linear_scaling_factor"
                     ],
-                    "error_scale": 0.05,
+                    "error_scale": 0.25,
                     "units": "$/m2",
                     "per": "facade area",
-                    "description": "",
-                    "source": ""
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 61.52,
-                    "indicator_cols": [
-                        "feature.geometry.computed.total_linear_facade_distance",
-                        "feature.geometry.est_uniform_linear_scaling_factor"
-                    ],
-                    "error_scale": 0.05,
-                    "units": "$/m",
-                    "per": "total linear facade distance",
-                    "description": "",
-                    "source": ""
+                    "description": "D inside cavity plus exterior plus remove 25 plus 172 plus 20, uncertainy",
+                    "source": "EVS dataset cross verified RSMeans and LBNL Study"
                 }
             ]
         },
@@ -396,7 +372,7 @@
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 181.69,
+                    "coefficient": 172,
                     "indicator_cols": [
                         "feature.geometry.computed.whole_bldg_facade_area",
                         "feature.geometry.est_uniform_linear_scaling_factor"
@@ -404,50 +380,50 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "facade area",
-                    "description": "",
-                    "source": ""
+                    "description": "D",
+                    "source": "LBNL study"
                 }
             ]
         },
         {
-            "trigger_column": "AtticFloorInsulation",
-            "initial": "NoInsulation",
-            "final": "HighlyInsulated",
+            "trigger_column": "Walls",
+            "initial": "FullInsulationWalls",
+            "final": "SIP",
             "order": ["LinearQuantity"],
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 16.58,
+                    "coefficient": 215,
                     "indicator_cols": [
-                        "feature.geometry.computed.footprint_area",
-                        "feature.geometry.est_fp_ratio"
+                        "feature.geometry.computed.whole_bldg_facade_area",
+                        "feature.geometry.est_uniform_linear_scaling_factor"
                     ],
                     "error_scale": 0.05,
                     "units": "$/m2",
-                    "per": "footprint area",
-                    "description": "",
-                    "source": ""
+                    "per": "facade area",
+                    "description": "D",
+                    "source": "Assumed 10 dollars per sqft SIP itself"
                 }
             ]
         },
         {
-            "trigger_column": "AtticFloorInsulation",
-            "initial": "Insulated",
-            "final": "HighlyInsulated",
+            "trigger_column": "Walls",
+            "initial": "SomeInsulationWalls",
+            "final": "SIP",
             "order": ["LinearQuantity"],
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 9.58,
+                    "coefficient": 215,
                     "indicator_cols": [
-                        "feature.geometry.computed.footprint_area",
-                        "feature.geometry.est_fp_ratio"
+                        "feature.geometry.computed.whole_bldg_facade_area",
+                        "feature.geometry.est_uniform_linear_scaling_factor"
                     ],
                     "error_scale": 0.05,
                     "units": "$/m2",
-                    "per": "footprint area",
-                    "description": "",
-                    "source": ""
+                    "per": "facade area",
+                    "description": "D",
+                    "source": "Assumed 10 dollars per sqft SIP itself"
                 }
             ]
         },
@@ -459,23 +435,23 @@
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 16.58,
+                    "coefficient": 21.52,
                     "indicator_cols": [
                         "feature.geometry.computed.footprint_area",
                         "feature.geometry.est_fp_ratio"
                     ],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "units": "$/m2",
                     "per": "footprint area",
-                    "description": "",
-                    "source": ""
+                    "description": "D Total cost needed to retrofit an attic floor",
+                    "source": "EVS dataset cross verified w LBNL"
                 }
             ]
         },
         {
             "trigger_column": "AtticFloorInsulation",
-            "initial": "NoInsulation",
-            "final": "Insulated",
+            "initial": "Insulated",
+            "final": "HighlyInsulated",
             "order": ["LinearQuantity"],
             "quantity_factors": [
                 {
@@ -488,7 +464,7 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "footprint area",
-                    "description": "",
+                    "description": "D",
                     "source": ""
                 }
             ]
@@ -509,8 +485,8 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "window area",
-                    "description": "",
-                    "source": ""
+                    "description": "D",
+                    "source": "RSMeans, Homewyse"
                 }
             ]
         },
@@ -530,8 +506,8 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "window area",
-                    "description": "",
-                    "source": ""
+                    "description": "D",
+                    "source": "RSMeans, Homewyse"
                 }
             ]
         },
@@ -543,10 +519,10 @@
             "quantity_factors": [
                 {
                     "type": "FixedQuantity",
-                    "amount": 500.0,
+                    "amount": 200.0,
                     "error_scale": 0.05,
-                    "description": "Fixed cost for programmable thermostat",
-                    "source": ""
+                    "description": "D NO C wire Fixed cost for programmable thermostat plus labor",
+                    "source": "RSMeans, general market products"
                 }
             ]
         },
@@ -560,8 +536,8 @@
                     "type": "FixedQuantity",
                     "amount": 4250.0,
                     "error_scale": 0.05,
-                    "description": "Fixed cost for high efficiency equipment",
-                    "source": ""
+                    "description": "D Fixed cost for high efficiency equipment",
+                    "source": "Market products"
                 }
             ]
         },
@@ -573,10 +549,10 @@
             "quantity_factors": [
                 {
                     "type": "FixedQuantity",
-                    "amount": 3000.0,
+                    "amount": 2500.0,
                     "error_scale": 0.05,
-                    "description": "Fixed cost for high efficiency equipment",
-                    "source": ""
+                    "description": "D 50 Gallon unit plus labor",
+                    "source": "RSMeans "
                 }
             ]
         },
@@ -588,9 +564,9 @@
             "quantity_factors": [
                 {
                     "type": "FixedQuantity",
-                    "amount": 3200.0,
+                    "amount": 2700.0,
                     "error_scale": 0.05,
-                    "description": "<lorem>",
+                    "description": "D 50 Gallon unit plus labor",
                     "source": ""
                 }
             ]
@@ -603,9 +579,9 @@
             "quantity_factors": [
                 {
                     "type": "FixedQuantity",
-                    "amount": 3200.0,
+                    "amount": 2700.0,
                     "error_scale": 0.05,
-                    "description": "<lorem>",
+                    "description": "D 50 Gallon unit plus labor",
                     "source": ""
                 }
             ]
@@ -637,7 +613,7 @@
                 },
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 303,
+                    "coefficient": 89,
                     "indicator_cols": [
                         "feature.calculated.heating_capacity_kW"
                     ],
@@ -832,231 +808,27 @@
         {
             "trigger_column": "Cooling",
             "initial": null,
-            "final": "WindowASHP",
-            "order": ["FixedQuantity", "LinearQuantity"],
+            "final": "ASHPCooling",
+            "order": ["FixedQuantity"],
             "quantity_factors": [
                 {
                     "type": "FixedQuantity",
-                    "amount": -79,
+                    "amount": 334,
                     "error_scale": 0.05,
                     "description": "Base intercept for ASHP cost based on capacity and building characteristics",
                     "source": "Heat pump cost model"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 2.996,
-                    "indicator_cols": [
-                        "feature.geometry.energy_model_conditioned_area"
-                    ],
-                    "error_scale": 0.05,
-                    "units": "$/kW",
-                    "per": "heating system",
-                    "description": "Area-based component for ASHP cost based on capacity and building characteristics",
-                    "source": "Heat pump cost model"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 303,
-                    "indicator_cols": [
-                        "feature.calculated.heating_capacity_kW"
-                    ],
-                    "error_scale": 0.05,
-                    "units": "$/kW",
-                    "per": "heating system",
-                    "description": "Heating capacity component for ASHP cost based on capacity and building characteristics",
-                    "source": "Heat pump cost model"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 948,
-                    "indicator_cols": ["feature.location.in_county.Berkshire"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Berkshire",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 924,
-                    "indicator_cols": ["feature.location.in_county.Bristol"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Bristol",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 4777,
-                    "indicator_cols": ["feature.location.in_county.Dukes"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Dukes",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 2864,
-                    "indicator_cols": ["feature.location.in_county.Essex"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Essex",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 456,
-                    "indicator_cols": ["feature.location.in_county.Franklin"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Franklin",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 613,
-                    "indicator_cols": ["feature.location.in_county.Hampden"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Hampden",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 500,
-                    "indicator_cols": ["feature.location.in_county.Hampshire"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Hampshire",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 2497,
-                    "indicator_cols": ["feature.location.in_county.Middlesex"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Middlesex",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 5840,
-                    "indicator_cols": ["feature.location.in_county.Nantucket"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Nantucket",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 2624,
-                    "indicator_cols": ["feature.location.in_county.Norfolk"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Norfolk",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 791,
-                    "indicator_cols": ["feature.location.in_county.Plymouth"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Plymouth",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 3591,
-                    "indicator_cols": ["feature.location.in_county.Suffolk"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Suffolk",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 752,
-                    "indicator_cols": ["feature.location.in_county.Worcester"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Worcester",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 0,
-                    "indicator_cols": ["feature.location.in_county.Barnstable"],
-                    "error_scale": 0.05,
-                    "description": "County factor for Barnstable",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "county factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 438,
-                    "indicator_cols": ["feature.system.has_gas.true"],
-                    "error_scale": 0.05,
-                    "description": "Gas availability factor",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "gas availability factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 0,
-                    "indicator_cols": ["feature.system.has_gas.false"],
-                    "error_scale": 0,
-                    "description": "Gas availability factor",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "gas availability factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 334,
-                    "indicator_cols": ["feature.system.has_cooling.true"],
-                    "error_scale": 0.05,
-                    "description": "Cooling availability factor",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "cooling availability factor"
-                },
-                {
-                    "type": "LinearQuantity",
-                    "coefficient": 0,
-                    "indicator_cols": ["feature.system.has_cooling.false"],
-                    "error_scale": 0,
-                    "description": "Cooling availability factor",
-                    "source": "Heat pump cost model",
-                    "units": "$",
-                    "per": "cooling availability factor"
                 }
             ]
         },
         {
             "trigger_column": "Cooling",
             "initial": null,
-            "final": "ASHPCooling",
+            "final": "WindowASHP",
             "order": ["FixedQuantity"],
             "quantity_factors": [
                 {
                     "type": "FixedQuantity",
-                    "amount": 334,
+                    "amount": 2000,
                     "error_scale": 0.05,
                     "description": "Base intercept for ASHP cost based on capacity and building characteristics",
                     "source": "Heat pump cost model"
@@ -1071,7 +843,7 @@
             "quantity_factors": [
                 {
                     "type": "FixedQuantity",
-                    "amount": 1334,
+                    "amount": 334,
                     "error_scale": 0.05,
                     "description": "Base intercept for GSHP cost based on capacity and building characteristics",
                     "source": "Heat pump cost model"
@@ -1087,29 +859,29 @@
                 {
                     "type": "FixedQuantity",
                     "amount": -79,
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "Base intercept for ASHP cost based on capacity and building characteristics",
                     "source": "Heat pump cost model"
                 },
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 3.496,
+                    "coefficient": 2.996,
                     "indicator_cols": [
                         "feature.geometry.energy_model_conditioned_area"
                     ],
-                    "error_scale": 0.05,
-                    "units": "$/kW",
+                    "error_scale": 0.1,
+                    "units": "$/sqm",
                     "per": "heating system",
                     "description": "Area-based component for ASHP cost based on capacity and building characteristics",
                     "source": "Heat pump cost model"
                 },
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 350,
+                    "coefficient": 378,
                     "indicator_cols": [
                         "feature.calculated.heating_capacity_kW"
                     ],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "units": "$/kW",
                     "per": "heating system",
                     "description": "Heating capacity component for ASHP cost based on capacity and building characteristics",
@@ -1119,7 +891,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 948,
                     "indicator_cols": ["feature.location.in_county.Berkshire"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Berkshire",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1129,7 +901,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 924,
                     "indicator_cols": ["feature.location.in_county.Bristol"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Bristol",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1139,7 +911,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 4777,
                     "indicator_cols": ["feature.location.in_county.Dukes"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Dukes",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1149,7 +921,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 2864,
                     "indicator_cols": ["feature.location.in_county.Essex"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Essex",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1159,7 +931,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 456,
                     "indicator_cols": ["feature.location.in_county.Franklin"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Franklin",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1169,7 +941,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 613,
                     "indicator_cols": ["feature.location.in_county.Hampden"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Hampden",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1179,7 +951,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 500,
                     "indicator_cols": ["feature.location.in_county.Hampshire"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Hampshire",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1189,7 +961,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 2497,
                     "indicator_cols": ["feature.location.in_county.Middlesex"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Middlesex",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1199,7 +971,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 5840,
                     "indicator_cols": ["feature.location.in_county.Nantucket"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Nantucket",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1209,7 +981,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 2624,
                     "indicator_cols": ["feature.location.in_county.Norfolk"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Norfolk",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1219,7 +991,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 791,
                     "indicator_cols": ["feature.location.in_county.Plymouth"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Plymouth",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1229,7 +1001,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 3591,
                     "indicator_cols": ["feature.location.in_county.Suffolk"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Suffolk",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1239,7 +1011,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 752,
                     "indicator_cols": ["feature.location.in_county.Worcester"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Worcester",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1249,7 +1021,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 0,
                     "indicator_cols": ["feature.location.in_county.Barnstable"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "County factor for Barnstable",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1259,7 +1031,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 438,
                     "indicator_cols": ["feature.system.has_gas.true"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "Gas availability factor",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1279,7 +1051,7 @@
                     "type": "LinearQuantity",
                     "coefficient": 334,
                     "indicator_cols": ["feature.system.has_cooling.true"],
-                    "error_scale": 0.05,
+                    "error_scale": 0.1,
                     "description": "Cooling availability factor",
                     "source": "Heat pump cost model",
                     "units": "$",
@@ -1306,7 +1078,7 @@
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 6.46,
+                    "coefficient": 5.5,
                     "indicator_cols": [
                         "feature.geometry.computed.footprint_area",
                         "feature.geometry.est_fp_ratio"
@@ -1314,8 +1086,8 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "footprint area",
-                    "description": "",
-                    "source": ""
+                    "description": "D",
+                    "source": "EVS Dataset for Pipe Tenting, Angi, HomeAdvisor. About 0.2 ft of exposed pipe per footprint"
                 }
             ]
         },
@@ -1327,7 +1099,7 @@
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 32.29,
+                    "coefficient": 16.14,
                     "indicator_cols": [
                         "feature.geometry.computed.footprint_area",
                         "feature.geometry.est_fp_ratio"
@@ -1335,8 +1107,8 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "footprint area",
-                    "description": "",
-                    "source": ""
+                    "description": "D",
+                    "source": "RSMeans new insulated ductwork. About .1 ft of exposed ductwork per footprint"
                 }
             ]
         },
@@ -1348,7 +1120,7 @@
             "quantity_factors": [
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 11.48,
+                    "coefficient": 8.6,
                     "indicator_cols": [
                         "feature.geometry.computed.perimeter",
                         "feature.extra_spaces.basement.exists.num",
@@ -1363,7 +1135,7 @@
                 },
                 {
                     "type": "LinearQuantity",
-                    "coefficient": 32.2917,
+                    "coefficient": 18.7,
                     "indicator_cols": [
                         "feature.geometry.computed.footprint_area",
                         "feature.geometry.est_fp_ratio",
@@ -1373,8 +1145,8 @@
                     "error_scale": 0.05,
                     "units": "$/m2",
                     "per": "footprint area",
-                    "description": "",
-                    "source": ""
+                    "description": "D Basement sills and ceiling 6inch",
+                    "source": "RSMeans and EVS Data"
                 }
             ]
         },
@@ -1396,14 +1168,13 @@
                     "error_scale": 0.05,
                     "units": "$/m",
                     "per": "perimeter",
-                    "description": "",
-                    "source": ""
+                    "description": "Apply 2in XPS to basement wall and knee wall, apply dimple mat ($1), build 4in stud wall ($4) w R11 batt insulation, no drywall. 8ft basement",
+                    "source": "Homewyse, various online market"
                 }
             ]
         }
     ],
     "output_key": "cost",
     "raise_on_duplicate_trigger": true,
-    "create_metadata": false,
-    "metadata_aggregation": null
+    "create_metadata": false
 }
diff --git a/epengine/models/inference.py b/epengine/models/inference.py
index 5148cba..8d5304a 100644
--- a/epengine/models/inference.py
+++ b/epengine/models/inference.py
@@ -1287,6 +1287,70 @@ def make_priors(self):
         prior_dict["feature.fuels.emissions.NaturalGas"] = gas_emissions_prior
         prior_dict["feature.fuels.emissions.Oil"] = oil_emissions_prior
 
+        # --- Solar-related priors ---
+        # Ensure solar priors are part of the main Priors set, so changes to
+        # feature.semantic.OnsiteSolar appear in the dependency graph and are
+        # picked up by select_prior_tree_for_changed_features.
+
+        # Annual yield kWh/kW-year
+        solar_yield_prior = UnconditionalPrior(
+            sampler=ClippedNormalSampler(
+                mean=1100,
+                std=150,
+                clip_min=800,
+                clip_max=1400,
+            )
+        )
+        prior_dict["feature.solar.yield_kWh_per_kW_year"] = solar_yield_prior
+
+        # Panel power density W/m2
+        panel_power_density_prior = UnconditionalPrior(
+            sampler=ClippedNormalSampler(
+                mean=180,
+                std=50,
+                clip_min=120,
+                clip_max=300,
+            )
+        )
+        prior_dict["feature.solar.panel_power_density_w_per_m2"] = (
+            panel_power_density_prior
+        )
+
+        # Upgraded coverage depends on semantic OnsiteSolar choice
+        solar_coverage_prior = ConditionalPrior(
+            source_feature="feature.semantic.OnsiteSolar",
+            fallback_prior=None,
+            conditions=[
+                ConditionalPriorCondition(
+                    match_val="LowSolarPV", sampler=FixedValueSampler(value=0.25)
+                ),
+                ConditionalPriorCondition(
+                    match_val="MedSolarPV", sampler=FixedValueSampler(value=0.50)
+                ),
+                ConditionalPriorCondition(
+                    match_val="MaxSolarPV", sampler=FixedValueSampler(value=1.0)
+                ),
+                ConditionalPriorCondition(
+                    match_val="NoSolarPV", sampler=FixedValueSampler(value=0.0)
+                ),
+                ConditionalPriorCondition(
+                    match_val="ExistingSolarPV", sampler=FixedValueSampler(value=0.0)
+                ),
+            ],
+        )
+        prior_dict["feature.solar.upgraded_coverage"] = solar_coverage_prior
+
+        # Max roof utilization for PV placement
+        max_roof_utilization_prior = UnconditionalPrior(
+            sampler=ClippedNormalSampler(
+                mean=0.75,
+                std=0.05,
+                clip_min=0.6,
+                clip_max=0.9,
+            )
+        )
+        prior_dict["feature.solar.max_roof_utilization"] = max_roof_utilization_prior
+
         # TODO: optionally create the matrix for moving raw values to
         # various energy end uses, fuels, emissions, costs.
 
@@ -1314,44 +1378,74 @@ def generator(self) -> np.random.Generator:
         """The random number generator for the experiment."""
         return np.random.default_rng(42)
 
+    # TODO: remove this function?
     def add_solar_features(self, features: pd.DataFrame) -> pd.DataFrame:
         """Add solar-related features to the features DataFrame."""
-        # Add solar yield as a base feature (Massachusetts average)
-        yield_sampler = ClippedNormalSampler(
-            mean=1100,
-            std=150,
-            clip_min=800,
-            clip_max=1400,
-        )
-        features["feature.solar.yield_kWh_per_kW_year"] = yield_sampler.sample(
-            features, len(features), self.generator
-        )
-        panel_power_density_sampler = ClippedNormalSampler(
-            mean=180,
-            std=50,
-            clip_min=120,
-            clip_max=300,
-        )
-        features["feature.solar.panel_power_density_w_per_m2"] = (
-            panel_power_density_sampler.sample(features, len(features), self.generator)
-        )
+        # Do not overwrite if priors already sampled these columns
+        if "feature.solar.yield_kWh_per_kW_year" not in features.columns:
+            yield_sampler = ClippedNormalSampler(
+                mean=1100,
+                std=150,
+                clip_min=800,
+                clip_max=1400,
+            )
+            features["feature.solar.yield_kWh_per_kW_year"] = yield_sampler.sample(
+                features, len(features), self.generator
+            )
+
+        if "feature.solar.panel_power_density_w_per_m2" not in features.columns:
+            panel_power_density_sampler = ClippedNormalSampler(
+                mean=180,
+                std=50,
+                clip_min=120,
+                clip_max=300,
+            )
+            features["feature.solar.panel_power_density_w_per_m2"] = (
+                panel_power_density_sampler.sample(
+                    features, len(features), self.generator
+                )
+            )
+
         # Set default value for OnsiteSolar if not provided
         if "feature.semantic.OnsiteSolar" not in features.columns:
             features["feature.semantic.OnsiteSolar"] = "NoSolarPV"
-        # Calculate upgraded solar coverage based on semantic field
-        features["feature.solar.upgraded_coverage"] = np.where(
-            features["feature.semantic.OnsiteSolar"] == "LowSolarPV",
-            0.25,
-            np.where(
-                features["feature.semantic.OnsiteSolar"] == "MedSolarPV",
-                0.50,
-                np.where(
-                    features["feature.semantic.OnsiteSolar"] == "MaxSolarPV",
-                    1.0,
-                    0.0,
-                ),
-            ),
-        )
+
+        # Create a consolidated upgraded coverage column for downstream cost logic, if missing
+        if "feature.solar.upgraded_coverage" not in features.columns:
+            coverage_prior = ConditionalPrior(
+                source_feature="feature.semantic.OnsiteSolar",
+                fallback_prior=None,
+                conditions=[
+                    ConditionalPriorCondition(
+                        match_val="LowSolarPV", sampler=FixedValueSampler(value=0.25)
+                    ),
+                    ConditionalPriorCondition(
+                        match_val="MedSolarPV", sampler=FixedValueSampler(value=0.50)
+                    ),
+                    ConditionalPriorCondition(
+                        match_val="MaxSolarPV", sampler=FixedValueSampler(value=1.0)
+                    ),
+                    ConditionalPriorCondition(
+                        match_val="NoSolarPV", sampler=FixedValueSampler(value=0.0)
+                    ),
+                ],
+            )
+            features["feature.solar.upgraded_coverage"] = coverage_prior.sample(
+                features, len(features), self.generator
+            )
+
+        if "feature.solar.max_roof_utilization" not in features.columns:
+            max_roof_utilization_sampler = ClippedNormalSampler(
+                mean=0.75,
+                std=0.05,
+                clip_min=0.6,
+                clip_max=0.9,
+            )
+            features["feature.solar.max_roof_utilization"] = (
+                max_roof_utilization_sampler.sample(
+                    features, len(features), self.generator
+                )
+            )
 
         features["feature.upgrade.solar_pv_kW"] = 0.0
 
@@ -1362,23 +1456,9 @@ def update_max_solar_coverage(
     ) -> pd.DataFrame:
         """Update the MaxSolarPV coverage when electricity consumption data is available."""
         features = features.copy()
-
-        # Set coverage values based on solar type
-        features["feature.solar.upgraded_coverage"] = np.where(
-            features["feature.semantic.OnsiteSolar"] == "MaxSolarPV",
-            features["feature.solar.upgraded_coverage"],
-            np.where(
-                features["feature.semantic.OnsiteSolar"] == "LowSolarPV",
-                0.25,
-                np.where(
-                    features["feature.semantic.OnsiteSolar"] == "MedSolarPV",
-                    0.50,
-                    0.0,
-                ),
-            ),
-        )
-
+        # Base coverage values for non-Max choices
         # Handle MaxSolarPV samples - calculate feasible coverage for each
+        base_coverage = features["feature.solar.upgraded_coverage"]
         max_solar_mask = features["feature.semantic.OnsiteSolar"] == "MaxSolarPV"
         if max_solar_mask.any():
             max_feasible = self.calculate_feasible_solar_coverage(
@@ -1386,9 +1466,9 @@ def update_max_solar_coverage(
                 electricity_consumption.loc[max_solar_mask],
             )
             # Use the maximum feasible coverage for each sample
-            features.loc[max_solar_mask, "feature.solar.upgraded_coverage"] = (
-                max_feasible
-            )
+            base_coverage[max_solar_mask] = max_feasible
+
+        features["feature.solar.upgraded_coverage"] = base_coverage
 
         return features
 
@@ -1412,10 +1492,7 @@ def make_features(self, n: int) -> tuple[pd.DataFrame, pd.DataFrame]:
         )
         df = priors.sample(df, n, self.generator)
 
-        # Add solar features
-        df = self.add_solar_features(df)
-
-        # Defer solar upgrade capacity calculation to the post-prediction phase
+        # Solar features are now included in priors; avoid re-sampling here
 
         original_cooling = None
         mask = None
@@ -1451,8 +1528,6 @@ def make_retrofit_cost_features(
 
         safety_factor = 1.2
         raw_capacity_kW = peak_heating_per_m2 * safety_factor
-
-        # Map calculated capacity to nearest available equipment size (unless above max)
         available_sizes_kW = np.array([
             5.3,
             7.0,
@@ -1535,13 +1610,11 @@ def oh_col_name_for_county(county: str) -> str:
             "feature.system.has_cooling.true"
         ]
 
-        # Add solar system size for retrofit cost calculations
         if features["feature.semantic.OnsiteSolar"].iloc[0] in [
             "LowSolarPV",
             "MedSolarPV",
             "MaxSolarPV",
         ]:
-            # Calculate the required solar system size for the upgrade
             electricity_consumption = elect_eui * self.actual_conditioned_area_m2
 
             # Update MaxSolarPV coverage if needed
@@ -1556,7 +1629,6 @@ def oh_col_name_for_county(county: str) -> str:
             cost_features["feature.upgrade.solar_pv_kW"] = required_system_size
 
         else:
-            # No solar upgrade, set to 0
             cost_features["feature.upgrade.solar_pv_kW"] = 0.0
 
         return cost_features
@@ -1795,7 +1867,8 @@ def compute_costs(
         )
         end_use_costs = cast(
             pd.DataFrame,
-            base_end_use_costs.groupby(level="EndUse", axis=1).sum(),
+            # Avoid deprecated axis=1: use transpose-then-groupby pattern
+            base_end_use_costs.T.groupby(level="EndUse").sum().T,
         )
         solar_cost_total = net_elec_costs.sum(axis=1) - elec_costs.sum(axis=1)
         end_use_costs["Solar"] = solar_cost_total
@@ -1847,7 +1920,8 @@ def compute_emissions(
         )
         end_use_emissions = cast(
             pd.DataFrame,
-            allowed_end_use_emissions.groupby(level="EndUse", axis=1).sum(),
+            # Avoid deprecated axis=1: use transpose-then-groupby pattern
+            allowed_end_use_emissions.T.groupby(level="EndUse").sum().T,
         )
         solar_emissions_total = net_elec_emissions.sum(axis=1) - elec_emissions.sum(
             axis=1
@@ -1856,7 +1930,8 @@ def compute_emissions(
 
         fuel_emissions = cast(
             pd.DataFrame,
-            disaggregated_emissions.groupby(level="Fuel", axis=1).sum(),
+            # Avoid deprecated axis=1: use transpose-then-groupby pattern
+            disaggregated_emissions.T.groupby(level="Fuel").sum().T,
         )
 
         return fuel_emissions, end_use_emissions
@@ -2004,13 +2079,12 @@ def calculate_feasible_solar_coverage(
         roof_area_m2 = features["feature.geometry.computed.roof_surface_area"]
 
         # Solar panel assumptions
-        # panel_efficiency = 0.22
         panel_power_density = features["feature.solar.panel_power_density_w_per_m2"]
-        max_roof_utilization = 0.50  # Only 50% of roof can be covered, assuming we have a fire safety boundary. This is a very high level estimate
-        # TODO: Account for roof angle, orientation, and shading
 
-        # Calculate maximum solar capacity possible
-        max_solar_area_m2 = roof_area_m2 * max_roof_utilization
+        # TODO: Account for roof angle, orientation, and shading
+        max_solar_area_m2 = (
+            roof_area_m2 * features["feature.solar.max_roof_utilization"]
+        )
         max_solar_capacity_kW = (max_solar_area_m2 * panel_power_density) / 1000
         max_local_solar_capacity_kW = 25
         mask = max_solar_capacity_kW > max_local_solar_capacity_kW
@@ -2074,7 +2148,6 @@ def apply_solar_to_electricity_consumption(
         """Apply solar generation to electricity consumption to get net consumption."""
         net_consumption = electricity_consumption.copy()
 
-        # Get the OnsiteSolar semantic field value - vectorized approach
         if "feature.semantic.OnsiteSolar" not in features.columns:
             # No solar column, return original consumption
             return net_consumption
@@ -2200,12 +2273,14 @@ def run(  # noqa: C901
         changed_feature_fields, changed_context_fields = self.changed_context_fields
 
         changed_feature_names = set(changed_feature_fields.keys())
+        print("CHANGED FEATURE NAMES", changed_feature_names)
 
         # then we will get the priors that must be re-run as they are downstream
         # of the changed features.
         changed_priors = original_priors.select_prior_tree_for_changed_features(
             changed_feature_names
         )
+        print(changed_priors.sampled_features.keys())
 
         # then we will take the original features and update the changed semantic
         # features.
@@ -2217,14 +2292,18 @@ def run(  # noqa: C901
         new_features = changed_priors.sample(
             new_features, len(new_features), self.original.generator
         )
+        print(new_features.columns)
         new_transformed_features = self.original.source_feature_transform.transform(
             new_features
         )
         # Get peak results for cost calculations
         new_results_raw = self.original.predict(new_transformed_features)
-        new_results = self.original.compute_distributions(new_features, new_results_raw)
+        new_results_energy = cast(pd.DataFrame, new_results_raw["Energy"])
+
+        new_results = self.original.compute_distributions(
+            new_features, new_results_energy
+        )
         new_results_peak = cast(pd.DataFrame, new_results_raw["Peak"])
-        # new_results_energy = cast(pd.DataFrame, new_results_raw["Energy"])
 
         # finally, we compute the deltas and the corresponding summary
         # statistics.
@@ -2256,23 +2335,16 @@ def run(  # noqa: C901
 
         # Compute features for cost calculations after inference
         # For solar upgrades, we need to use the ACTUAL electricity consumption (before solar)
-        # to calculate the system size needed, not the net consumption if there is alrearyd some solar
-        upgraded_spec = SBEMInferenceRequestSpec(
-            **{
-                k: v
-                for k, v in self.original.model_dump().items()
-                if k != "semantic_field_context"
-            },
-            semantic_field_context=self.upgraded_semantic_field_context,
-        )
+        # to calculate the system size needed, not the net consumption if there is already some solar
 
-        electricity_eui = upgraded_spec._actual_electricity_consumption.sum(axis=1)
+        # TODO: update the sampling to occur immidiately before the inference runs
 
-        # Calculate the feature distributions for solar features (yield, coverage)
-        new_features_with_solar = upgraded_spec.add_solar_features(new_features)
+        electricity_eui = self.original._actual_electricity_consumption.sum(axis=1)
 
-        features_for_costs = upgraded_spec.make_retrofit_cost_features(
-            new_features_with_solar, new_results_peak, electricity_eui
+        # Calculate the feature distributions for solar features (yield, coverage)
+        # Solar features are included in priors; use new_features directly
+        features_for_costs = self.original.make_retrofit_cost_features(
+            new_features, new_results_peak, electricity_eui
         )
 
         retrofit_costs = self.compute_retrofit_costs(features_for_costs, cost_config)

From ba264a5a5c06ed543a8d41a107bbcca50290129c Mon Sep 17 00:00:00 2001
From: DARYA GUETTLER <daryag@mit.edu>
Date: Thu, 16 Oct 2025 09:14:15 -0400
Subject: [PATCH 6/6] update max solar cost

---
 epengine/models/data/retrofit-costs.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/epengine/models/data/retrofit-costs.json b/epengine/models/data/retrofit-costs.json
index 3cb8f3c..d8d599c 100644
--- a/epengine/models/data/retrofit-costs.json
+++ b/epengine/models/data/retrofit-costs.json
@@ -39,7 +39,7 @@
         {
             "trigger_column": "OnsiteSolar",
             "initial": "NoSolarPV",
-            "final": "HighSolarPV",
+            "final": "MaxSolarPV",
             "order": ["LinearQuantity"],
             "quantity_factors": [
                 {