From 9ab326d2329e462420326d99de9835ff87a81476 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Wed, 11 Mar 2026 16:41:37 -0400 Subject: [PATCH 1/5] push --- .../services/data_readers/json_reader.py | 9 +- .../services/data_readers/xpt_reader.py | 1 + .../data_services/excel_data_service.py | 18 +- tests/resources/Datasets_whitespace.json | 470 ++++++++++++++++++ tests/resources/Datasets_whitespace.xlsx | Bin 0 -> 20920 bytes tests/unit/test_json_reader.py | 21 + .../test_excel_data_service.py | 44 ++ 7 files changed, 554 insertions(+), 9 deletions(-) create mode 100644 tests/resources/Datasets_whitespace.json create mode 100644 tests/resources/Datasets_whitespace.xlsx create mode 100644 tests/unit/test_json_reader.py diff --git a/cdisc_rules_engine/services/data_readers/json_reader.py b/cdisc_rules_engine/services/data_readers/json_reader.py index fb80530f7..3186be1f3 100644 --- a/cdisc_rules_engine/services/data_readers/json_reader.py +++ b/cdisc_rules_engine/services/data_readers/json_reader.py @@ -10,7 +10,7 @@ def from_file(self, file_path): try: with open(file_path, "r", encoding=self.encoding) as fp: json_data = load(fp) - return json_data + return self._strip_dataset_keys(json_data) except (UnicodeDecodeError, UnicodeError) as e: raise InvalidJSONFormat( f"\n Error reading JSON from: {file_path}" @@ -23,5 +23,12 @@ def from_file(self, file_path): f"\n {type(e).__name__}: {e}" ) + def _strip_dataset_keys(self, json_data: dict) -> dict: + for dataset in json_data.get("datasets", []): + records = dataset.get("records", {}) + stripped = {k.strip(): v for k, v in records.items()} + dataset["records"] = stripped + return json_data + def read(self, data): pass diff --git a/cdisc_rules_engine/services/data_readers/xpt_reader.py b/cdisc_rules_engine/services/data_readers/xpt_reader.py index d20e1e85d..668086b0a 100644 --- a/cdisc_rules_engine/services/data_readers/xpt_reader.py +++ b/cdisc_rules_engine/services/data_readers/xpt_reader.py @@ -50,4 +50,5 @@ def from_file(self, file_path): return self._read_pandas(file_path) def _format_floats(self, dataframe: pd.DataFrame) -> pd.DataFrame: + dataframe.columns = dataframe.columns.str.strip() return dataframe.applymap(lambda x: round(x, 15) if isinstance(x, float) else x) diff --git a/cdisc_rules_engine/services/data_services/excel_data_service.py b/cdisc_rules_engine/services/data_services/excel_data_service.py index 9127aeb89..325e09616 100644 --- a/cdisc_rules_engine/services/data_services/excel_data_service.py +++ b/cdisc_rules_engine/services/data_services/excel_data_service.py @@ -105,6 +105,7 @@ def get_dataset(self, dataset_name: str, **params) -> DatasetInterface: false_values=["False", "FALSE", "false", False, 0, "0"], ) dataframe = dataframe.replace({nan: None}) + dataframe.columns = dataframe.columns.str.strip() dataset = PandasDataset(dataframe) return dataset @@ -117,12 +118,14 @@ def _get_dataset_name( @functools.lru_cache(maxsize=None) def _get_datasets_worksheet(self) -> pd.DataFrame: - return pd.read_excel( + df = pd.read_excel( self.dataset_path, sheet_name=ExcelDataSheets.DATASETS_SHEET_NAME.value, na_values=[""], keep_default_na=False, ) + df.columns = df.columns.str.strip() + return df @cached_dataset(DatasetTypes.RAW_METADATA.value) def get_raw_dataset_metadata( @@ -170,22 +173,20 @@ def get_variables_metadata(self, dataset_name: str, **params) -> DatasetInterfac na_values=[""], keep_default_na=False, ) + row0 = [v.strip() for v in dataframe.iloc[0].tolist()] metadata_to_return: VariableMetadataContainer = VariableMetadataContainer( { - "variable_names": dataframe.iloc[0].tolist(), + "variable_names": row0, "variable_labels": dataframe.iloc[1].tolist(), "variable_formats": [""] * dataframe.shape[1], "variable_name_to_label_map": dict( - zip(dataframe.iloc[0].tolist(), dataframe.iloc[1].tolist()) + zip(row0, dataframe.iloc[1].tolist()) ), "variable_name_to_data_type_map": dict( - zip(dataframe.iloc[0].tolist(), dataframe.iloc[2].tolist()) + zip(row0, dataframe.iloc[2].tolist()) ), "variable_name_to_size_map": dict( - zip( - dataframe.iloc[0].tolist(), - dataframe.iloc[3].tolist(), - ) + zip(row0, dataframe.iloc[3].tolist()) ), "number_of_variables": dataframe.shape[1], } @@ -220,6 +221,7 @@ def get_datasets(self) -> List[dict]: na_values=[""], keep_default_na=False, ) + worksheet.columns = worksheet.columns.str.strip() except ExcelTestDataError: raise except Exception as e: diff --git a/tests/resources/Datasets_whitespace.json b/tests/resources/Datasets_whitespace.json new file mode 100644 index 000000000..f467e059a --- /dev/null +++ b/tests/resources/Datasets_whitespace.json @@ -0,0 +1,470 @@ +{ + "datasets": [ + { + "filename": "ex.xpt", + "label": "Exposure", + "domain": "EX", + "variables": [ + { + "name": "STUDYID", + "label": "Study Identifier", + "type": "Char", + "length": 10 + }, + { + "name": "DOMAIN ", + "label": "Domain Abbreviation", + "type": "Char", + "length": 2 + }, + { + "name": "USUBJID", + "label": "Unique Subject Identifier ", + "type": "Char", + "length": 20 + }, + { + "name": "EXSEQ", + "label": "Sequence Number", + "type": "Num", + "length": 8 + }, + { + "name": "EXTRT", + "label": "Name of Actual Treatment ", + "type": "Char", + "length": 20 + }, + { + "name": "EXDOSE", + "label": "Dose per Administration ", + "type": "Num", + "length": 8 + }, + { + "name": "EXDOSU", + "label": "Dose Units ", + "type": "Char", + "length": 20 + }, + { + "name": "EXDOSFRM", + "label": "Dose Form ", + "type": "Char", + "length": 20 + }, + { + "name": "EXDOSFRQ", + "label": "Dosing Frequency Per Interval ", + "type": "Char", + "length": 20 + }, + { + "name": "EXROUTE", + "label": "Route of Administration ", + "type": "Char", + "length": 20 + }, + { + "name": "EXLOT", + "label": "Lot Number ", + "type": "Char", + "length": 20 + }, + { + "name": "RPHASE", + "label": "Reproductive Phase ", + "type": "Char", + "length": 20 + }, + { + "name": "RPPLDY", + "label": "Planned Repro Phase Day", + "type": "Num", + "length": 8 + }, + { + "name": "RPPLSTDY", + "label": "Planned Repro Phase Day of Obs Start ", + "type": "Num", + "length": 8 + }, + { + "name": "RPPLENDY", + "label": "Planned Repro Phase Day of Obs End ", + "type": "Num", + "length": 8 + }, + { + "name": "EXRPDY", + "label": "Actual Repro Phase Day of Observation ", + "type": "Num", + "length": 8 + }, + { + "name": "EXRPSTDY", + "label": "Actual Repro Phase Day of Obs Start", + "type": "Num", + "length": 8 + }, + { + "name": "EXRPENDY", + "label": "Actual Repro Phase Day of Obs End", + "type": "Num", + "length": 8 + }, + { + "name": "EXSTDTC", + "label": "Start Date/Time of Treatment ", + "type": "Char", + "length": 20 + } + ], + "records": { + "STUDYID": [ + "EFD111 ", + "EFD111 ", + "EFD111 ", + "EFD111 ", + "EFD111 ", + "EFD111 " + ], + "DOMAIN ": ["EX", "EX", "EX", "EX", "EX", "EX"], + "USUBJID": [ + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 " + ], + "EXSEQ": [1, 2, 3, 4, 5, 6], + "EXTRT": ["DRUG A", "DRUG A", "DRUG A", "DRUG A", "DRUG A", "DRUG A"], + "EXDOSE": [1, 1, 1, 1, 1, 1], + "EXDOSU": ["mg", "mg", "mg", "mg", "mg", "mg"], + "EXDOSFRM": [ + "POWDER", + "POWDER", + "POWDER", + "POWDER", + "POWDER", + "POWDER" + ], + "EXDOSFRQ": ["", "", "", "", "", ""], + "EXROUTE": ["ORAL", "ORAL", "ORAL", "ORAL", "ORAL", "ORAL"], + "EXLOT": ["", "", "", "", "", ""], + "RPHASE": [ + "GESTATION ", + "GESTATION ", + "GESTATION ", + "GESTATION ", + "GESTATION ", + "GESTATION " + ], + "RPPLDY": [1.01, 2, 3, 4, -5, 0], + "RPPLSTDY": [1, 2.6, 3, 4, 5, 6], + "RPPLENDY": [1, 2, 1.1, 4, 5, 6], + "EXRPDY": [1, 2, 3, -2.2, 5, 6], + "EXRPSTDY": [1, 2, 3, 4, 2.3, 6], + "EXRPENDY": [1, 2, 3, 4, 5, -4.1], + "EXSTDTC": [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-04", + "2023-01-05", + "2023-01-06" + ] + } + }, + { + "filename": "lb.xpt", + "label": "Laboratory", + "domain": "LB", + "variables": [ + { + "name": "STUDYID", + "label": "Study Identifier", + "type": "Char", + "length": 10 + }, + { + "name": "DOMAIN ", + "label": "Domain Abbreviation", + "type": "Char", + "length": 2 + }, + { + "name": "USUBJID", + "label": "Unique Subject Identifier ", + "type": "Char", + "length": 20 + }, + { + "name": "LBSEQ", + "label": "Sequence Number", + "type": "Num", + "length": 8 + }, + { + "name": "LBTESTCD", + "label": "Lab Test or Examination Short Name ", + "type": "Char", + "length": 8 + }, + { + "name": "LBTEST", + "label": "Lab Test or Examination Name ", + "type": "Char", + "length": 40 + }, + { + "name": "LBORRES", + "label": "Result or Findings as Collected ", + "type": "Char", + "length": 20 + }, + { + "name": "RPHASE", + "label": "Reproductive Phase ", + "type": "Char", + "length": 20 + }, + { + "name": "RPPLDY", + "label": "Planned Repro Phase Day", + "type": "Num", + "length": 8 + }, + { + "name": "RPPLSTDY", + "label": "Planned Repro Phase Day of Obs Start ", + "type": "Num", + "length": 8 + }, + { + "name": "RPPLENDY", + "label": "Planned Repro Phase Day of Obs End ", + "type": "Num", + "length": 8 + }, + { + "name": "LBRPDY", + "label": "Actual Repro Phase Day of Observation ", + "type": "Num", + "length": 8 + }, + { + "name": "LBRPSTDY", + "label": "Actual Repro Phase Day of Obs Start", + "type": "Num", + "length": 8 + }, + { + "name": "LBRPENDY", + "label": "Actual Repro Phase Day of Obs End", + "type": "Num", + "length": 8 + }, + { + "name": "LBSTDTC", + "label": "Start Date/Time of Treatment ", + "type": "Char", + "length": 20 + } + ], + "records": { + "STUDYID ": [ + "EFD111 ", + "EFD111 ", + "EFD111 ", + "EFD111 ", + "EFD111 ", + "EFD111 " + ], + "DOMAIN ": ["LB", "LB", "LB", "LB", "LB", "LB"], + "USUBJID": [ + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 " + ], + "LBSEQ": [1, 2, 3, 4, 5, 6], + "LBTESTCD": ["GLUC", "GLUC", "GLUC", "GLUC", "GLUC", "GLUC"], + "LBTEST": [ + "Glucose", + "Glucose", + "Glucose", + "Glucose", + "Glucose", + "Glucose" + ], + "LBORRES": [ + "POSITIVE", + "POSITIVE", + "POSITIVE", + "POSITIVE", + "POSITIVE", + "POSITIVE" + ], + "RPHASE": [ + "GESTATION ", + "GESTATION ", + "GESTATION ", + "GESTATION ", + "GESTATION ", + "GESTATION " + ], + "RPPLDY": [1.01, 2, 3, 4, -5, -5], + "RPPLSTDY": [1, 2.6, 3, 4, 5, 5], + "RPPLENDY": [1, 2, 1.1, 4, 5, 5], + "LBRPDY": [1, 2, 3, -2.2, 5, 5], + "LBRPSTDY": [1, 2, 3, 4, 2.3, 5], + "LBRPENDY": [1, 2, 3, 4, 5, 5.7], + "LBSTDTC": [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-04", + "2023-01-05", + "2023-01-06" + ] + } + }, + { + "filename": "ds.xpt", + "label": "Disposition", + "domain": "DS", + "variables": [ + { + "name": "STUDYID", + "label": "Study Identifier", + "type": "Char", + "length": 10 + }, + { + "name": "DOMAIN ", + "label": "Domain Abbreviation", + "type": "Char", + "length": 2 + }, + { + "name": "USUBJID", + "label": "Unique Subject Identifier ", + "type": "Char", + "length": 20 + }, + { + "name": "DSSEQ", + "label": "Sequence Number", + "type": "Num", + "length": 8 + }, + { + "name": "DSTERM", + "label": "Reported Term for the Disposition Event ", + "type": "Char", + "length": 100 + }, + { + "name": "RPHASE", + "label": "Reproductive Phase ", + "type": "Char", + "length": 20 + }, + { + "name": "RPPLDY", + "label": "Planned Repro Phase Day", + "type": "Num", + "length": 8 + }, + { + "name": "RPPLSTDY", + "label": "Planned Repro Phase Day of Obs Start ", + "type": "Num", + "length": 8 + }, + { + "name": "RPPLENDY", + "label": "Planned Repro Phase Day of Obs End ", + "type": "Num", + "length": 8 + }, + { + "name": "DSRPDY", + "label": "Actual Repro Phase Day of Observation ", + "type": "Num", + "length": 8 + }, + { + "name": "DSRPSTDY", + "label": "Actual Repro Phase Day of Obs Start", + "type": "Num", + "length": 8 + }, + { + "name": "DSRPENDY", + "label": "Actual Repro Phase Day of Obs End", + "type": "Num", + "length": 8 + }, + { + "name": "DSSTDTC", + "label": "Start Date/Time of Treatment ", + "type": "Char", + "length": 20 + } + ], + "records": { + "STUDYID": [ + "EFD111 ", + "EFD111 ", + "EFD111 ", + "EFD111 ", + "EFD111 ", + "EFD111 " + ], + "DOMAIN ": ["DS", "DS", "DS", "DS", "DS", "DS"], + "USUBJID": [ + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 ", + "EFD111-0001 " + ], + "DSSEQ": [1, 2, 3, 4, 5, 6], + "DSTERM": ["aa", "bb", "cc", "dd", "ee", "ee"], + "RPHASE": [ + "GESTATION ", + "GESTATION ", + "GESTATION ", + "GESTATION ", + "GESTATION ", + "GESTATION " + ], + "RPPLDY": [1.01, 2, 3, 4, -5, -6], + "RPPLSTDY": [1, 2.6, 3, 4, 5, 6], + "RPPLENDY": [1, 2, 1.1, 4, 5, 6], + "DSRPDY": [1, 2, 3, -2.2, 5, 6], + "DSRPSTDY": [1, 2, 3, 4, 2.3, 6], + "DSRPENDY": [1, 2, 3, 4, 5, 6.1], + "DSSTDTC": [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-04", + "2023-01-05", + "2023-01-06" + ] + } + } + ], + "standard": { + "product": "sendig", + "version": "3-1" + }, + "codelists": [] +} diff --git a/tests/resources/Datasets_whitespace.xlsx b/tests/resources/Datasets_whitespace.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2401994962258ceeb7052a4784e081ad6c2b3929 GIT binary patch literal 20920 zcmeFZbC_gLvo6}UHEr9rZClf}ZQC}cJ#F{2ZQI7QZJ(a`zPr!fcmMV||J*^ zs$45qy^#@-5pU)yc`0BJ6aX*)2mk;80)V~!j`1u&0DzXSkH}x&XbIWcI2qeGeOGq3 zGj`OWb+fh_Pmz)8<3|X&4EQ8CQ^}~z4P(CF9#Vi-y=pZnnV5YyFfBXpa;>iWR7K9qUe!rR1VnzJ2O+9bqKaFPfed>`C!E^Kf&c zN2lY_Lo1*`t025l%QDpPM@-{_XT6ZtrVrS)PII6eO-}>jJAR~dJErz8!d<|$Vs8!( zPqQ4oK)b$C9aDhTUwD-i0qZR_E@ zU??Ufhg=Z{@jxIoxC;J#Njt8aoq2Fh2%BWXs6ZmFHYqdCAH(Yxrw$l5{)Q z8KZYVhV*wmw)D8d*7rwI0jInsT#u}3*Btlsdy~2Xw*mxD7o-#s$bc-9xi-~asrTk) zaKuCh(daYrUVlQXveWW=rfAB8yybh|h>G3(AX<`7KXhj?)qwz>7HxBO^<)Qh%{xi> zG7|0N1Dud8O!*1sfk|-)E=S;e-lz8Gc2+i-YV$D<+mRw7s<>j|$vBOtxD~;ZTb6v) zydvVB8lTbQu+djSLttDo&G+1+OUoHWMNyLA4B+hF&u8qQ4ef;@j>x|2>7rhT^zDrB zCF(YGBl(XEwLFHCYEx`TlbdxfUxN7WqH^Nn@aX+&A^#E=?pJN-?C4}`t!`~a_b;KD zI~iLu{L%iu(hHjEi2!1N0o!id=1N-6o^(TqKX%hVYY%VUZggT~8ZcMNAb)v4lcSDKJR^lkp^`)&Gx~vDk;E9wso~m4bmFXI>-1sG|EOOkWQ&jY0yh7gE_w6G z3LEJ9Byxd9KZ=LAx}i?4-Rm`;)I7`@OAgkq=k_b3Y#qAQ@_xv7<4X_zJ9_ODGQz(9 z3wp-?jNYP;{tLZl-ld+?_pAqc$?!QTjD1#7=nhOV32DPIbxHFrpW+fUFfK{KEjpG^ z&$I~C#T2)ja1{kYtTf?S zT}u~aurXAUC}T!>I917XvFwVrRmA(IDn4f)mLJ|sDoa?yI$eOy9)$^ST{3;!IJhG; zfCX=ec^kcgJ4nATrNbkwZo>?^p+ih9wURvrentDdR=}uT8l3P1fI? zTUhj(nHJbF$oJ;`IdvRle5~fU!9#8LClM3hkn=XugLl=mx1f!y>S%BW_z|L(7NwLf zL3!?8F1mK${|SEitJ?70*XVnF>HZ&||G#zrKeXN}PR2Hn9zpmM>IMgfKn4JiV}DgV z+2eSD#j+;ImBy3e`5E?I-1DbL=9ni_#)Ffy;|&;rX0I%Y1}zf5OyEE*GIVAGQc351 zH2gk6O>o*eL{#);fBzX4ML|w?X&Og4un1$8xkEx!E#^M$6#Oh;xj=0KAKGT+f z%F~QWV^RI}8o^F&++4o((t%+p7B3~;tvuf~8qAxBk_p!&IZAp#c4UVi91)#3;&gCv zbWjTrS|@8vEi_&ir@}k{D9M{ep#nUGuN}0rt>gsWEMiKFMgG`jYZYc#4US~&)c8hQ zqh0eikFoDuUn2PLCejgPKos}Yvi4Q}yO91GhgA$0TrWKWXm!ABBF)kCF0((B!U51A zdqh#i(hV)%7B0Aef09Rz39buqyEFEJ7>?vkb`P>CCU?3`$RG- z8qib?xA{c|e`(g&egaBvm#aqe#vXN@S@g3&wWo}3S08GYz0`OP8&$PbVuCXKee%I>?{NdeXyb@!G<@_yt zV-s|#VPL@1)fPG;-%7`5B>89m^8*`-NJKudmSSC`o;}TUKcgx8?cUp7)XUD&907ff zc-E6bGvoQrFtd0V(Gt8X6;sisp3TS0HE^*Mc5qUV)2jn9yqg@{uwQ6bwe%c2{AjJgl!+Nf@J`{S`Y$fe_9S;FuVEvj_Gee@L-mE8+G5uZy^m z`-LcK6QdAoTfwt*w%pbxa{xR2x_&3)3kn`u8mbXJi4btSAq6CNr?7%RRQH_Na2DKX zlSO~|eZmN8P{^JmzZZJWL40p%?*yZ9Tk>dcvLa#zzK^%LP^Dek7G*Gz>lk?#@)1OR z`R2Ci0bLArdldf*W?6}cnOli%QkYhT!G8Oyq5kymo=Rtv?JNC4(53+qfmsy}mno0qu%4hlhtdw?^m#Tp`)k?}s1IzqkOGDa^Yb$WvX|iB-PS;th8&exoIL zz>WoH`lbV0tYig`_7DVveYu)RO^S;V-x7y-r|kKe&n-C| zixLp>rv00M`w}ZmEl^IlqubF_vOgHUv69ev4zV2{CS5hY7Tu| z-$?cDC&@nDvr#Il20vJG(Bvw+s< zwF3^$&GH7$uI;2RWN{7A8`gmPuuWH^$2?ZpuGXD+Zwh59?13#YF@phy{wbdN&y%XL z60)nXt^%X3nDrR!&Nw@XQ&k1VhCyGj2SQ=5OJLFbAoj z7!YET4nv4+QVrjxBV)oYG*sJ~It$VQysPq>BZ%x|{4H6ceoZb#2aGN+$3DCFkT@bX zYJh2_m*G(%exrGxCMijVOlQ0c1mFOqPH7A}B^v_hxI(InAJ}x&0TL-E4%Cz0MF9PF z^n==|E4W8WmI7Z6AjR*eI}Ishl2Y0LT%Q1>I0R|^8>T|AII87$v%uSrI z@eFJ-R3_Z8ve_0qr$<8a)v}$$u>p&UC=NgwQWsv(JYhaR_yuX|^bBO?TXFjZ4a&Do z7k;uFtY|Wn!fnmHa;kxHlqExy4(>|w?@(6TgKm&m$>V|JLHHEb_|L5~Oy&IObf~no z+T@EAQ4sljfUqo4tZ1g7@@lF679+KTOZa!l&Bu)p>!!rsMJzTW;gn!%yiGRx+BEXe zAf_7#)J*QEX2zszlW=q z)MXZ|oD{E?@jQHHo_zl(*0ipcc&r^ttw$zfQ}?VY7Oybe+=kcO3(>*96|0Z{h$dHH z5_}gFWNlKoD6U{~gSSTc&WbdTD|gR0y0|4EBxp$?knD-Xv%u_>k3xG|Wu!s0^Q^-< z;JnGKK4)aisIYv%xo)n-c86#~4@Pl>;}pye?1DU17)n-@lB2toYdT!@8gSsA?k29l zxv>Ha8+o=ez?#X;;oex9V~2IIa7%qwadeMn!hLKsD4HIuDZOAjr#F2tnlVzLeSxwz z0n|=^!PbRvy=R$>*dU7qvn*(KeO<1X09&XR@BmRCH7ZX>3T$lPKw*y0P(gLiHz$AW z$0-nmXKZ=*&Ft#;`JiNVZC6r3duNcA~n(J%cS1(?fa)4}z^JZ+Irtz}63S+|vTNJ? zsPZP*D?CuTEC(?_4LA&o58k08O#jQL^p=+n#93?}X5 z1*Tvy&BM@pw;i_}#!Vz3cSRimOJKyv+*AXE6AMVBhe}QFpTCVRzyqC#5A0AGkE`=U zyAH_K0(#Ru2ODsu%ZT;yrj%{gZdhU2lbfH=AnF4u)p z2gk8if4Hc}?>_GC7btb#<3RN}%-qGy^qV6zbD(^agp-fgD^T;8W~k;_`qu)LCL9OarPKDjaG)_n-MXSK7)>?#viJ*Qr_mz9Y@?O$if{@ zwNn|z6Lk(tl*bhL%@DhX$_M)q`5oac#Tz5GY^RyT;Wh9CQ|};+z~0B+>)lj`)7ow1 zk9Hd%cY%J6ajwu1NmmajX((IU_J8$y3wiT zMIm-y!$-ltB|m^}q*2tCYwUSi85|MV>7R(o_`tQ5FElC0c3HDDM&X&Ky777O^u+g% z!IrL^Zc@`%%g9$j|0@4Aq?UKEwR8NNO~OR?Kz)Ub!e3kp!UHaHXf?rx zMczs>X$1a98wV^C{BP^WT>3D!m|D|1`do7a@tn1s3$&`#L~4Ql{h!Xj$eQ>e?>3w( zP4;uG`v$v+Q;>nfLbi-vKE~1uYBSs80K6V1Bsx{v%cp1mCiT z|1;LV;qiaPI;W~tjt)xoFRY7#^;9w_0&lLV95774wl1M^=nGgS?DQJL-!RCDr_T91 zg4gaVEae$EEH;LN(86|Va;IFZ*PCct(%gEbdVdt6_tTLaX*`}d$!DA;^@A;y^Z3l^*=3)Cb<_@6%T59k_M2U6Y3CMl+@X=3_>B!Bk)rwv#fzM~kFBX6t6y;cA?Lx% z)QP}B9Pld+>Sh%IJS>1*8of67Xl)9I2lV$m z1b-fYXxN+`1iMS15F`U=&;vQr#1{28Dz4z^+qD>AXqZ#LR@r56idbWDuP6HA0YtXW zdKBqV`@u3tvI8HaH}fWc3S!;le%$EuiLhEtunzLNsXa-yLGvmUH-&}H2+`%#!b6C@ zUodcmRs|YGXB2Q@=J$n5Io|a@_0QSi|w3BRB#+#GQ+)F0g2(-ncDx!c31j6b#!NM77i93G8T%+%iGBv?3-vP zTQnsVHengE?%)w+KwZ(V42M)`N42Sc4Mmkw^ zr=eWO+x7b8uLf1YcLc3O@aJ}rvJ-v|5f&B0oy=)l|uEg*@&$qLEDluk=^%xDKHC@0_K^D$zKb1nZ zVj9008d(*zn#@ZYxuJsL-3*1^IWH!ro#Vqe1TkXpYH^VSr9Ds^8M@e1tQRiTTUBsP zlI6epn1L>xi$EDm09*;LM%HS2%;AOF>eelPCS=?E;=J|ICQHpM;d_ekFb}2e%jGu6 z5<_qo5J^7_{O;i-m&+3d7{I8gH90QJphnO$-y@@vW< z2!^jS-2NZl)YaC((!kc%@;}(K_GB5$Ao?%jIrsy7q(gKg)~BX5RdLy}Em$`&2CqVb zm}za%cdIARMPI@bfzCgDn=AHGI$`5O>sJpMyH$mR2(~C8K;l|Hg9_zlES)PnA-`g= zd?ia<1g`v9{aWS*RYSxRQAH*kuqs1ZM8;3g1f*NchTdrxJ-3nuS*qP>l{%$o2^IWA z@>R)4Eg_?C89jO8!CWZXpUbY=&$iZDDKc)6joBBIaHl17te4B2jt{pXO zmbq-PQd&&1we4;Sb$I_M@WvRi$&@e8vjhtOfbuVaJDTY`7#k@$Ihfm+I{wvIH7(mr zdK91BhEKig9Drlvxg>fLhGWK=-e#+f0>EGTKwN#KlArHzBpH-z$47aSF~Q_iZW^^i!O2CLBVy2y66z8blSjQC`Mob^9lmVe1PSAe4fM|o z*L~}(70%3r`jajx0?FXpt;2%4&I@QBXh@2d6(}=Tb2ug&H{v2Res1F=LZ9y5j0)nE z`?xys;8&fhZ)z)VYI`V*ja9`tYI!)eg@Qf3>NP?^kB*)wh|rN%L0Ok&FwV-O!i&pV zq81Jv+gGP=<2`CSRTk$@%2>&dYZ1D?7Fy5+hsRO@o=`!SDuBEU`yzQN@{8g(1T=wL zxJzX0)d<*=B%!@vKV7>3dVAt7>>KQO#f%eryxAO_g1aE% za*Ht%Cn7e^0x!emK0@}V9o_a#Qdm4)!K;3RDtJUbj{k5O3CcncgJynnNe+A3&-&4< zBc?jMDKg%!Cf zLy9@pXf{2>8UO)wN;O#>mG}3WTRMBHPBFn=u?opz_Kqm+vNnDcS*R3PpOU}%zpEbv zUo)Z`_eSRyjyYkSoWjCkpTnhb)JKNz^~ynS-)Y7khjT0CHqYH=&?(;-u`ZyzHSQ@I zem_nzr~HhtG^&JQt|m>bMz5}$K*m>3kMY+_7NlJ~;n{(0K2fQG+FO=$T5eDeRKZS1 zvy9C|qe3@RuQ1Ib;E<%yZf+O(i0^*D7!;}7qaVuowfa*qD01qjX?sXoM3qmIp;C{| z7%nIwP^G(0Uu%ed<=}~;luL~w2b)0g#y2)+weD7xQiU|DxpUX^=2?p~ix+)mVLJm# zM7~&~O(N8E0A_UyN^8uk?Svf4Rs1PAiM~LzRMjIn65_0Dv!3{^K|uo!qUA z{~GX__IKL?aTs5|<4)jR9%FIA3}FJ#3B1B&Rc~{()+I=Skt47$uvw`Gr>*xhKp=C1 zqKW>B&V)wGL z-cq1)KpUpAWSQ)e$bGT!3MUz|y!6?`TTx`RJssl>#mIaqwT$yF2N^j-ckpmx+%SxF zTqMo?d9d)o+{S@A*fsL}q{aIluZYstZKz8esRl*;r=wN;udxh$pof>mjZIEK#b}B- zJgZ`{z1>bgQhcz=`#t{gzCPUN`ZHoRQcQB@t!h7iMdY4U&3yzBrx-+QS{*K^?7J5n zn1x%ZPIX;vi``cLjDjwW_JEy5?{j{2G1nwtc0w&@5pFlg{h?BycPtE+iDHu1?-Kes z=N9W2F^7`h>r{v4g(arFD@#84GY2vrDInjnmsB0C8oH%vn2BtDrwd4Tu zhg+WO7*n0ZN1>zDPUbIn7FRlIG`_eSz>SyJ-|Yrgb{F=B!lBd9b5oW}K^5WgwNDV1 zT%T!`F=qg3GRazlZKEk2k=6F=S)kfx3wX&E&O3(Lnyg0kZ%%=ik2g-f5NiH6PKzN? z`X&Xq<5k=l$FRz{TN1U=z4%1wviPK?p@X!lyI5HfBj9Qs=5M7>9}MgkCT4Ma#Vbi2 zVA(yZshr-l0eW~cZm%_Y?jm_}Br!BsO6<^Du)CzI8Q!dk{Ok)zIg@!< zbjR=dUHBB;l6G_8Nf$?vBhQ`Zmam1U?3?T9p$z{)4J#Y5h0;agqx2S%nVTFZa-F&R zcE8{-Ww%OpCAuM69o z-9Lo7rp4a{PT&=N<6Z*i@d`ii@4-v?mQU_^|9nR;ef$pQ(X|tmB>W#IL6{vN@`B*2 zBjSoVvdBEn{imO>qt+1<{=9KR(pdz?Za`rGV7WfXq@6Ab1A5d$2RToBwZ%@pli^Ea z`m11I_S7ZBiuLTy(|-S5kY$-c4`ub;o)A!9{Q#;e z5SDNFE)UyrVzlR*Fww-1S^o2~i-HTk4Gz{*Hy#t$7hv8q7;v7FJq{OFhNG#Jm_ubJ z*UfN-aq6|n=?DdP)2s;kI6}&^%<@ll9A_DBE2ePbc5v*&61^TJI5*o@yR%C-eGEW( zmIv#lV-BHA*m0mbR0S$TCy#b`om;3t(;rSGqGW_4I^v@sh7Gb&q*3R zD%18fh>V%Kdhy8bF?EjNvn2k!ca3ZtXnaQdP_H|ELH*z_vvh#=#aA)m>&Upj2~Mso z0Qp$Ro21NJE&2S%urE86xdG{G4f_P?UyFEO(}k0nv9G&z;spCp;3K-TGG#8U*09?_RKG8vrO?HF3BZ< zdsxY5+bV*#B;6*W2mSfWSR&9WF@j((y@ijyb;L#RIB-1j%c*l)_t#&i?XPx+wk(z8 zF158ET|2T#WDq%G-KN|*-mV8-`S5e9v{xo?Zf?@ryle6T79Q`u9hkYeD2)*TzysCH zR~1n>He|glp*8{5#p|x!6X0uFHRaXx&{X(Nk;(xH8?~!{=oLTr0Ocpl~ z%k?yKzhJ;%z5{7=oe1aS&YBsu>IOu@IC05k=5*w6T(}hXGtL(~NDh$qoa+e=bVgq6 zTUl7dI(B4ZlrP;p5&%TfdLaiBT~U}DfI|EBifR!y>Xl;;*7q~{ACDpYj#7FlzlVyx z`gHMZ>&F4hfg<&#}r6V`gJr z8$t(n0cK|NMa{#(Wxee{6I#JTE>VsMA>XbX4Kck``n{%tv(eOK&Px*wRA#Jc&G=Mf zi3zjV2AST?Uf040xH{0tgkwh`xb8X@oUA2(AKEHT#*H#5X6Cvi#>-I}kxUr{_XOei zh>FM3Fd=`&)zC2@A>&*?bV7OAF(iemf8HMC-j*7}XiHi$zB0F-84Ye23)-ILi71cC zZ9?Lvb67jn9F?~TF1k|V%aTr^?YfzT11E)77Q68n(?f$xp3KUoO~%T6q@)7pNT{m$ zsRy(6q5!rm{u!(SQ-T2Vxk{;rw^oI!KnyhJEe;J6pXVO{+JM&#R-SFoL_}d{ZIc34 zdRE1FRAj*UD9*@IG{@9&BEk3AQdbi)gLd$K`j$c!^4J*&Y@kqWtAZVht?4p^oyav^ z>xsvu;QpFsUlG8PCLd*`u4;R~Vug~$sG6(17fkt(Eb-hmS(?z$(xLk1yXl?Nv|5#wVWn{Y?A7xy$H)fiyfx|%d(4n$3S=A~wP?9?t8&jG3@A~abhx}w zS#7%x#oQ@KTZ&E1XqlpWWNv3~2weqixQK@{-L{~1-{*hi?QE@Q+1Wq<0PLUu0RGUP zUoP}NlN1%ITDAr3C_dlKKY?$!Kpn?KiV6_;wMx&G&6~2lF*)>*%KhT)k@9EW9yvnd zibq^foY^d3hj_gXPo_FQW>H$2^72jE?+`l~<3-1UN-DHpjxOCR;Nw-L6pM)uN$zK2 z7LUdza(%UJTikT)KFR?3HD1j~j9FNtI5lPh=GEHsNh*xAQG^4?Q$%$jC1*WEN~sCsa4uBZ(mVcqL!%_z7K1RM!_-aQ7&TE(amq_&|5b(fNJuJVDxK*!S8 z8-IvXC0w7^?FUdhK(zH#EkZoqZsTRrt9Ov7b26#a2w!EG-a!3`o;ak8jCcT!t4&6=HClS zY}7u-QLEu~=pL&t@|?Y+TE?!{c=_EMi|*I4e0LXrs?o{>Bjg=qs1=)q5Guzkl^rT6vBvm(%x%17 zKpF#JslpoObet&C2&G)AI$e_CXU>O6EkD7VeI)a@I8HMrd5Wjr3x$ zH=?Y*W#}6n3k@~hl_G1#bA|Pi+xdRvI)QS5wC#O>$m>>x#W@NjsIjw|Fhk?-Srq*A z>z*<+1$ISGV{Ja_XJ5qOX1vG0!rp+*GX-bQ5-oG|ESBFf%_WY^oAHqe*M??q>^@e- zEoa!A!X>S@G1xM=RCncuR>6O*_APk=nK`@@Ja~E*960WKfBO*JXfykGC+Spncfbou z9J?p0U)j%fzpG@^f3p;ngVOy5-;Ih@)gA#7OtjGvlhOtI_4aWdkHWPM4zqi-! z5MWZJ+JbE+`tWJ^RJV&4kgO^3QCdg+0h2}z7cQ;rYKE&#o|hh`;+@<~)iQ!I=G>$r z&wMyVx>p2D{seW4*A9{`I^{eW9eO**uX+qc*>Bjt_gUH5n+mclaU{WRwb( zc(#>m5Zk5C=o4|8oc<^Hd{>pZIFssZ187>j!-_@~$|gva46o#(V0UuK!HSQk7s6rN z>9|G>wv8u8X)*Iun|!O#b3xXO)vBhfi>WxC@AADLrdAoFM_KdpKW-@e^shC<{qjBq zUzz=%<-dRX7)LW>V<$(tzt!JZEh}-v=4<&cL5VWZ%|gcPns4l&TKsu!sdCdJU08FKD7D-SqC(aMq)+AKb>F zs=`0Yre|*E>cgg{$!15&k3UY&OLPpOEQx_ma(exjH7FQSuyY3Zo0H>jDsXOFR`Y=b zau2->DIwROd7Zu*5dzS5_1by8UbtQUYrp$fMb!`fxn0bx-!`=sn!P7bh)ktM%dGN} z4dNw^hB&ztXP(HeE%;cWL_NP8#pj;r7|kiN?wcU2iR*PHQqLbXa_#Mjbsl_CNiP_| zsWkAmA(Pit71z1y5|Aq#iOzN)%6(I%QW>X?Y#V$vn+NYvvWtbzV&TAB`jM;J2;m~4 zcn6Lh1y}5*j}E1JS94biQ})JJMR3v#G9VyvQNlFQgZAnI7|4@n%-2OEKAERiMTxl> zr_O~bY_(x5DVE< z{F;)-LNX-6!R#nM%~U3B>ceTWDvWJkUOZT7#c2r19`vNR+L-E@?92SV9Y8CA>JyYG zyV!vB^=usFhbX!!9kE7DH(FW8L5!iv2aa~L>DkV=p(PQD6|&N&%VI7`{aA%8!G0>@ z5TF!mH!%ywBPE(>Pv`;K^iuA@c}fnk2zixc1jaVQCI85OAWhEmQZaI&S1Kf?O4|DY zXtqNDPpCAc>;nGlUDtQ{d|k?)MMJ5lw|;^Kd@h;LY@M%wjjTd^<#CnuqwCf3%BZQU z4L&6$LnurazNoTy*>*$E796=jUHiW z3;qEoXmjXBXqJ^!_E@v`#U|=vr;;$&5OL&&m_q{N^=mVjEG9&fy#DDv^5@ouM#;dt z0976EX`wk(hA;!kG=$QqiPAJ4UHJ$O$>eSE{wTBjw3^+W&-3$~f$iDi0zeLao;_$~ zhpppa)=vS)Z|P{5>!nc(>w6^|^h=uC#&ok0MfP5O%&D+A5G@@c`el+(ZjAYJ(@%f~ z0`{@bZJJFN;(sm6Wvvle-zhSzQF*rO;mfjybJ?P6y(()igcuJ3u&H?|I#(E^FT27D zi)sebN5#ZVgeFrDrdG1-?Fn@jzbq@A5t<7F?g}>9hq}nf#h}Qx<}9!#6PfyrHq*6^ z`e@4y8@WLuZC4^I=-BG5U`&MKI!H*RUr=Q_l=zde$g1V&^{^zUgG(!MI>yqlNq-Nc zuV38m2Y=8YKK2A0t^fsyzVXFWxAg5+c?q-p3Z!QzZ;1N6&f7!+66Gevr`bH6#tWt` zjBeV@^Tt_M!1_FHhXymWvOWr-9jRu>X|T=!e8)!Y4Rx!`uDc6Yfw0pgmiKR`ytP^cM^G+m&bIbi820zPlvS$glzy|Gv#G{m0-<6pZn~{6 zd%3`YAf^)05sB<(ZJKg5(Q9w!!jyT~Zm)Bebw^PGYHSEDW8-*#-qU9;>IYZ&Cb)*_ z@B)*wNwd|Zgx2JkMLA0x^-rKu1O2E7s(XsU~?E1@UykI?e_sJQE&eB7rTJ zhFDfi*1NvZTVq^G>{aJr`U8$E444LJ@RDlb$=_t3hMj2LWhttV1(tA8vixdYfP%r{ zqq(#a*)rN4&z+K(gR>pkYcNoT&G*14$!T$oQCQrhB&~Wbw-y8WM!I$KlN3fSJ+Q;V zcN;XhorX9kTVn*2)rZK+A*3;PY{Io{_&B$@OLY0RDO`+UQIHLD@niNNU;Zg{ts*Rt zu>|kgYrt?cC9=vd2L<~rk^$170|=H8Aj3{Fz`Bzk<3ivx%cn2ANpES|GE@ebY7*IK7zUK{G z-oRC5K+_?G#ovULq99HEkcrOjAAeJ^zqS>Xc51WxHRRumMxo^; zk&mF_&>&og&mqgoW2-6s=MUQefeJKMx5Rjqd(3<$VQZIt~exQ&yUETcQj*D^*L&O^GgI?cKKHm4wfI&zUgJi729?s&-{?tCN= zj@~2ir(anFemr6Y@L&F7xPdXb{OV)_8Kl({AHwNT9O1}D;H!L{X(RGgWWI_J!j?|n zxu4}4tdzCV(fhdQZ`L||2<@Kr9AopTx;j&hwc$b0Cf2(8_>O4oGZ{SV!kYM5QA(^* z>p}%Kng;LRt1f+s#E!Ax=dfEYJ^tUkw9IyCI=c5*&PsI6X$8&xp12v&>oMlY1U}L%178hAZSGliO0GYC1mSW z$wQ$<-rQzNQ`Du@C^BkHU|FKJ>-)~LGZ@!41B+$TFzo%e3tEPCZ$}2I)*X`rn=nBq zp`asCbWxjrtnhURwcyb31k^vhTuKzR|3Q{viPpUHO1+M5&(^Bn7me0(305k`Ci-0r z_(K+@J|D|Q^&*5^m`ym$CP;r@v^{<}+D_$ZRAA&wG&-d1{yZv@?v+LpEYR!Cx33lN zZV#GySgh+1-*R@-1y5$8FBaC^BdR<5D~>`kliLWOW?&KXv38-hZ~ycJ8yXY>tDj=n zZ0ChjWK@)(LJR$kFNiEplft$tUCrnSUfBNih|fW+a}ZX7^Y;f%(1c$`gY+RJ%%Ot; z$^jz-n@EaiweUd145|XwhWM}#p%!h_G^kwW+D%|&U@YO5-l#480$bWkFT@i6UdY5A zT?TU5;nv{iSCsVcptC(y$99z-CFCy*>lF#$k7Fs2aJ8_nl_SMVc`u_&6xio1A3KK-udXGbX!Z?6}rsO;_|{ zXcFOb!kmAi{qe1I2sKsJEWIZ+c7tb(Nt^%N19lShAZQ{T@KA6JVj*|o&evm|lNU?% z&nx|0e4i*|bQ3;%Jd3vNMRn z7;0%1Fz8wxoQ5mczZ!KlO|?CYbZA$;cg9xQWY@kkT<#$y7KO)TbM6Jl*z4;I)rtYZ z({ML1DBR}DuFk~ARQhgUNwbn#G@V9m#40d#6Vrbs1sI;3gWWF?TLuo6yF5#y6joJn;78C4)wG7E58jc zV8+Z5~-2KMeMn04z)DE9b;wr))=tmEZV&J5{&nyO$s3j z2?Q%*j`+{LtslE+cC3P~Md;Z(P?Ea)!*Ev8R;-#p?5s z=*YW?E8re4U9M6439BC&lh%H?wJb{-_e~b@YFSaa;ED+K5pqKGjlvNh*gHjlpxoC9 z!D<`jRfK#cK1L9uH=JlIkuXoW>IWLlG%dB|Xw1^!dDU9bgg>6qC}S&a3^bVk?uAK|vi;hOH*^X7fCI8&_>5R$u2(|NiE>KOS2Ls$f2+SstGn{Eay=wF4^m-Bhq}$i8qD?-9=JwFCj=#=V!)sba`ZXyrbiv6< zG^ur22RS=c(;f-Z9R%Rua#6OdoS4qOyU!}JpIYM(64x7!FCA~ydkzkyJ7s^$q#2E3 zpBef3e^Y*`jZ1hNm!$iX6s%w@e$xIrGE$lCx;B@dCVfYd+pWmmjPiBTP)4$Co{C`q zDfN;@ttuhCQil$8lp56GTTklEQ&qX~08V#ACK?0|nqLjmJ9D-TnM8R^Ih63h#IN=Y zVQhL**IXe6KP80f>sT0&;Y!J^aj&JC*Avqd!eQ<6>$Mu6O{RN;dh}I3SS74G-7Ne( zIEIA}#>F?mbcTDXr2ZKvCwvmTT7VauB_lSs`eMGTJJxmLn-^>QCZb)gw*w{*9c$yf zIm5dADJUEe!x4leVmDcY$pXccHvu7HNLA);P<^}EA*LT=&_6#3@#TGK@G0K&MEMyb z8n7vTqa1^PMBM=aabbe=Uz(YC7+ApU=~}>Wy&Yvpj37~NKtm{dARzo~KtX7JKox^( zeh>rnxAa9qIO=8+Q>`h8^y`kX0QARU`Rc&}6vSm765{OVH-zSTFu?1_Z|2EG>y$2O zO_ETL+q^N@yW$l1(}^lx$%fb*1M!a7E&XGad*aAqtL(D_@w_oGC&d!w75l2SeQU&y zbeL8N^!?x?Y0qrJP~sS}p}5uBXq}{r&6cs)6ne>tHBcz_F+{0uG>vu1q9d9lr%4rR zbpjnX{qB@pzxvkt-?%y?6y2~d`fB`AUj@?O;d^IuOvgt+Cy{r`EX|sTAoYKT2mMT2 zO6+-y6k6;_VNx%05vN7nB-bc6%1*eKWQ|H^OL}e#B%d@IaAD71#!vJuR!@tK*kA&# zGC-{wMM*?J{d(Pcq#1;=`yMSC3-jRuBpp{KI7=>4tuNK;>3`%4qFcN zdMTgLl5(e(+o=?%eckxaz57tOge%Ha>ju9r_5mlK<03gtwnDGxujPY98VIgfh7&`< z+HW+_=cS~@o#Nu&3N2?GAQt}fYR4^JCXi+Dxu(!4Fsd92^I;cv8okpU31NUn>0&R- z%uVe)IkOYCS7-N>hrU&~e#t}cG%5S9w~-E!L6HD7n}|P0bsceLs4q zKOWE(_SBJ>>Ha}m?h_@Zt@ZodLpYmI#k@;fnV=PqdclQXvinkfCJb=4L zm(&uRN6aismRwt;&Im2f#nJ@B_l`bjcDiM)TpjtkFITM{V_>I7R z4n8mjwA)Q}TT&^E_sS_-5oSa^aWD`7cZl;P4s_Y0jI-4DC5XG^6A?nLOB@55A{O01 zp3ILgC$?`|ana15ETgeg@{OjE+(`iZh70n4C5(J>2P!(&(rp`b=YfuVeSV`tO*r@Q z<_tUF1VV&b93zlr%nHz?L~tJ81Gb^s832xFy^q|)4 zyfZMrgao|mLT05j5J-pJ)%ArA$1S~W?|+(j63Tko`y#vL3IE4wnB`y7 z@W|I+2mIOn+Zi~$5ue>(#Nr|-L2*tXFPU0T7;A_~Q&mV^bhp9fk^$-fiKr=dl!s3l;$aDT>Yv zM=+7$w+L*;YmRmIwybgXZ@^TWNv}P>)D414b-ozewBNJSGz5-^^j0bofbgo+ENL~t z?DE`tZ=EU8TQd#p;3X|u)S2Sy8~6sK$uL_uNk}wxhb^@+Qz*YXBto>~qWe~dSeILl z&&YBbP$oUJf^id8SbxvmT`uG78OU)?d{Rp@8o{a7d3OVs`D%zuw3^<-*YpI}Wb#yW z(5BKDYfR@pbqTYRT7MI}t~rkxrNcQW>i=d_PY72Qut~PU98Q6CknP7yT=71W)!>n( z(PqY$Gb0xT0S_fy4%2I|-UE_EdBiLt;p;qY-WpiP$~`14;N9fy8Inb$JOZ zt7Md+~hY97*wB!q@s>xNV;#1Qvq?)B5R-TmjByBQtjskk;A*V;baG1Y$eZHV1OS8C}WuT7d$ z7xSn?HQ^IY^^@-o&aAHzh+^0D$daG4(6h>kGW!q=S|yRlE_0AayOGkk^6iQDGGb#&p+9u zwn)DGbUcprg0wLp>_wBZY}ygEMP`kL3bicucIvueWm!Cpibzph8=k_k)*A?V&gFQr z>x?+b6IF4PZw^nisP~8WW=}eN3Uw6{Jh#0P$9Kjlhj^`+_FE%(lv^oB70x7BJcHr9-RJ?a7wW^YF|mAz0;l+R4?UDd;&96*DLMHh|Ly- zwjv*&4Ac+-MOM7$Xv`$-jw;5Qa#Lip9PjN@tG=2PA$BeH)*!1lXugbp5oK>N&3bB% z?T8hSbKMRm9{rki9kE@-U54D(zzTGKm4Awu)t=ARxqQD0gFO~#Z`4gD zoh?z#-(FK~TtMOwm%0YNBWL@>aTSpy138ywMrYVy^v>tqCanFhT{DrnO4~G9IZ%bZ zwrf-KN(aZZyoA?1xFJQRPWO7smXQhwi5-m59J} z&UQh`l-n@)t${3eqUGY&OU(#hw%9N9oHJiX`v&KI9e2~?ca>j-Ev*BLU!S7VcUGg8 z#}&7tpeE?qwrqR&O-qI-gEq?Jb@d_KiDelNeAiQgE#g_)ZRd7f?Wuh@8L)YX`7pqD ze>Rn}Su9(AizahGlm6-uTho7wb3yZECUas#!l4fxZojtXum?whw4rP6?ZcxiQX`BukpG^T}-llM|3fRXT-u6e4fo4FLJ z`?(wU$(u^Cb>2}KUAWcpq^Ne`nvZ>}+7mT$ja2K${LzojW%@X~;K&)BCOem;-4>=5eRCRy18r+ZjEYaC7z*w#ICk8Dg_?Nz&c948a7m~S0? z)o#)#B6iBySuC7Xm$?S@Ma*}7amZ?}FVasj!ge0`ywXLH|F^+K><7D!m;n74`hJ0% zA7Or!qZN#=w?_^a=lIWn@cf0HnmfjxyHCaRv+?Y{9mFL^tE{( zDfwD1#P0!MiW97^gGSHq-?g(-MwS5yi`YB8Y06{cuFp*K<}QY!*|qcCMDU*<23 zNqsdzix$kuLeCmKK*JXJB%nD2CY;ikCCphxKq Date: Wed, 11 Mar 2026 16:52:06 -0400 Subject: [PATCH 2/5] xpt --- cdisc_rules_engine/services/data_readers/xpt_reader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cdisc_rules_engine/services/data_readers/xpt_reader.py b/cdisc_rules_engine/services/data_readers/xpt_reader.py index 668086b0a..d20e1e85d 100644 --- a/cdisc_rules_engine/services/data_readers/xpt_reader.py +++ b/cdisc_rules_engine/services/data_readers/xpt_reader.py @@ -50,5 +50,4 @@ def from_file(self, file_path): return self._read_pandas(file_path) def _format_floats(self, dataframe: pd.DataFrame) -> pd.DataFrame: - dataframe.columns = dataframe.columns.str.strip() return dataframe.applymap(lambda x: round(x, 15) if isinstance(x, float) else x) From 51075d5f2a4f44dbc1e1860128cb0285a095ae7a Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Thu, 12 Mar 2026 09:32:34 -0400 Subject: [PATCH 3/5] reset singleton --- .../test_data_service/test_excel_data_service.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/unit/test_services/test_data_service/test_excel_data_service.py b/tests/unit/test_services/test_data_service/test_excel_data_service.py index 39407ebf7..8e47ac275 100644 --- a/tests/unit/test_services/test_data_service/test_excel_data_service.py +++ b/tests/unit/test_services/test_data_service/test_excel_data_service.py @@ -15,6 +15,13 @@ from cdisc_rules_engine.models.dataset import PandasDataset +@pytest.fixture(autouse=True) +def reset_excel_data_service(): + ExcelDataService._instance = None + yield + ExcelDataService._instance = None + + @pytest.mark.parametrize( "dataset_name", ("ecaa.xpt", "ecbb.xpt", "suppec.xpt"), From 07bff3089d3f3bdc5a96f3e7d4e76c3858aaf187 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Thu, 12 Mar 2026 10:56:50 -0400 Subject: [PATCH 4/5] shift behavior to raise error --- .../services/data_readers/json_reader.py | 22 ++++++++++--- .../data_services/excel_data_service.py | 24 ++++++++------ tests/unit/test_json_reader.py | 11 ++++--- .../test_excel_data_service.py | 32 +++---------------- 4 files changed, 42 insertions(+), 47 deletions(-) diff --git a/cdisc_rules_engine/services/data_readers/json_reader.py b/cdisc_rules_engine/services/data_readers/json_reader.py index 3186be1f3..8d7586151 100644 --- a/cdisc_rules_engine/services/data_readers/json_reader.py +++ b/cdisc_rules_engine/services/data_readers/json_reader.py @@ -10,7 +10,10 @@ def from_file(self, file_path): try: with open(file_path, "r", encoding=self.encoding) as fp: json_data = load(fp) - return self._strip_dataset_keys(json_data) + self._detect_whitespace_in_dataset_keys(json_data, file_path) + return json_data + except InvalidJSONFormat: + raise except (UnicodeDecodeError, UnicodeError) as e: raise InvalidJSONFormat( f"\n Error reading JSON from: {file_path}" @@ -23,12 +26,21 @@ def from_file(self, file_path): f"\n {type(e).__name__}: {e}" ) - def _strip_dataset_keys(self, json_data: dict) -> dict: + def _detect_whitespace_in_dataset_keys(self, json_data: dict, file_path: str): + offending = [] for dataset in json_data.get("datasets", []): + dataset_name = dataset.get("filename") records = dataset.get("records", {}) - stripped = {k.strip(): v for k, v in records.items()} - dataset["records"] = stripped - return json_data + for key in records: + if key != key.strip(): + offending.append(f" dataset '{dataset_name}': {repr(key)}") + if offending: + offending_list = "\n".join(offending) + raise InvalidJSONFormat( + f"\n Error reading JSON from: {file_path}" + f"\n The following column keys contain leading/trailing whitespace:" + f"\n{offending_list}" + ) def read(self, data): pass diff --git a/cdisc_rules_engine/services/data_services/excel_data_service.py b/cdisc_rules_engine/services/data_services/excel_data_service.py index 325e09616..3b3e2224a 100644 --- a/cdisc_rules_engine/services/data_services/excel_data_service.py +++ b/cdisc_rules_engine/services/data_services/excel_data_service.py @@ -105,7 +105,12 @@ def get_dataset(self, dataset_name: str, **params) -> DatasetInterface: false_values=["False", "FALSE", "false", False, 0, "0"], ) dataframe = dataframe.replace({nan: None}) - dataframe.columns = dataframe.columns.str.strip() + offending = [col for col in dataframe.columns if col != col.strip()] + if offending: + raise ExcelTestDataError( + f"Sheet '{dataset_name}' has column headers with leading/trailing whitespace: " + f"{[repr(c) for c in offending]}." + ) dataset = PandasDataset(dataframe) return dataset @@ -118,14 +123,12 @@ def _get_dataset_name( @functools.lru_cache(maxsize=None) def _get_datasets_worksheet(self) -> pd.DataFrame: - df = pd.read_excel( + return pd.read_excel( self.dataset_path, sheet_name=ExcelDataSheets.DATASETS_SHEET_NAME.value, na_values=[""], keep_default_na=False, ) - df.columns = df.columns.str.strip() - return df @cached_dataset(DatasetTypes.RAW_METADATA.value) def get_raw_dataset_metadata( @@ -173,20 +176,22 @@ def get_variables_metadata(self, dataset_name: str, **params) -> DatasetInterfac na_values=[""], keep_default_na=False, ) - row0 = [v.strip() for v in dataframe.iloc[0].tolist()] metadata_to_return: VariableMetadataContainer = VariableMetadataContainer( { - "variable_names": row0, + "variable_names": dataframe.iloc[0].tolist(), "variable_labels": dataframe.iloc[1].tolist(), "variable_formats": [""] * dataframe.shape[1], "variable_name_to_label_map": dict( - zip(row0, dataframe.iloc[1].tolist()) + zip(dataframe.iloc[0].tolist(), dataframe.iloc[1].tolist()) ), "variable_name_to_data_type_map": dict( - zip(row0, dataframe.iloc[2].tolist()) + zip(dataframe.iloc[0].tolist(), dataframe.iloc[2].tolist()) ), "variable_name_to_size_map": dict( - zip(row0, dataframe.iloc[3].tolist()) + zip( + dataframe.iloc[0].tolist(), + dataframe.iloc[3].tolist(), + ) ), "number_of_variables": dataframe.shape[1], } @@ -221,7 +226,6 @@ def get_datasets(self) -> List[dict]: na_values=[""], keep_default_na=False, ) - worksheet.columns = worksheet.columns.str.strip() except ExcelTestDataError: raise except Exception as e: diff --git a/tests/unit/test_json_reader.py b/tests/unit/test_json_reader.py index 39154c616..767d9c858 100644 --- a/tests/unit/test_json_reader.py +++ b/tests/unit/test_json_reader.py @@ -1,5 +1,7 @@ import os +import pytest from cdisc_rules_engine.services.data_readers.json_reader import JSONReader +from cdisc_rules_engine.exceptions.custom_exceptions import InvalidJSONFormat def test_json_reader_returns_dict(): @@ -11,11 +13,10 @@ def test_json_reader_returns_dict(): assert "datasets" in json_data -def test_whitespace_stripped_from_record_keys(): +def test_whitespace_from_record_keys(): test_dataset_path = ( f"{os.path.dirname(__file__)}/../resources/Datasets_whitespace.json" ) - json_data = JSONReader(encoding="utf-8").from_file(test_dataset_path) - for dataset in json_data.get("datasets", []): - for key in dataset.get("records", {}).keys(): - assert key == key.strip(), f"Key '{key}' has leading/trailing whitespace" + with pytest.raises(InvalidJSONFormat) as exc_info: + JSONReader(encoding="utf-8").from_file(test_dataset_path) + assert "leading/trailing whitespace" in str(exc_info.value.message) diff --git a/tests/unit/test_services/test_data_service/test_excel_data_service.py b/tests/unit/test_services/test_data_service/test_excel_data_service.py index 8e47ac275..1e6623aad 100644 --- a/tests/unit/test_services/test_data_service/test_excel_data_service.py +++ b/tests/unit/test_services/test_data_service/test_excel_data_service.py @@ -44,7 +44,7 @@ def test_get_dataset(dataset_name): "dataset_name", ("ex.xpt", "lb.xpt", "ds.xpt"), ) -def test_whitespace_get_dataset(dataset_name): +def test_whitespace_get_dataset_raises(dataset_name): dataset_path = ( f"{os.path.dirname(__file__)}/../../../resources/Datasets_whitespace.xlsx" ) @@ -56,10 +56,10 @@ def test_whitespace_get_dataset(dataset_name): dataset_implementation=PandasDataset, dataset_path=dataset_path, ) - data = data_service.get_dataset(dataset_name=dataset_name) - assert isinstance(data, PandasDataset) - assert "DOMAIN" in data.data.columns - assert "DOMAIN " not in data.data.columns + with pytest.raises(ExcelTestDataError) as exc_info: + data_service.get_dataset(dataset_name=dataset_name) + assert "leading/trailing whitespace" in str(exc_info.value.message) + assert any(col in exc_info.value.message for col in ["STUDYID", "DOMAIN", "EXSEQ"]) @pytest.mark.parametrize( @@ -132,28 +132,6 @@ def test_get_variables_metdata(dataset_name): assert key in data -@pytest.mark.parametrize( - "dataset_name", - ("ex.xpt", "lb.xpt", "ds.xpt"), -) -def test_whitespace_removal_get_variables_metadata(dataset_name): - dataset_path = ( - f"{os.path.dirname(__file__)}/../../../resources/Datasets_whitespace.xlsx" - ) - mock_cache = MagicMock() - mock_cache.get_dataset.return_value = None - data_service = ExcelDataService.get_instance( - config=ConfigService(), - cache_service=mock_cache, - dataset_implementation=PandasDataset, - dataset_path=dataset_path, - ) - data = data_service.get_variables_metadata(dataset_name=dataset_name, datasets=[]) - assert isinstance(data, PandasDataset) - assert "DOMAIN" in data.data["variable_name"].values - assert "DOMAIN " not in data.data["variable_name"].values - - def test_na_value_preserved_not_converted_to_nan(): """ Test that 'NA' string values are preserved and not converted to NaN/None. From 61412d41d6986bc880489e89dcc73c8e2c38b207 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Thu, 12 Mar 2026 11:06:22 -0400 Subject: [PATCH 5/5] tests --- tests/unit/test_json_reader.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_json_reader.py b/tests/unit/test_json_reader.py index 767d9c858..961505ae9 100644 --- a/tests/unit/test_json_reader.py +++ b/tests/unit/test_json_reader.py @@ -4,13 +4,13 @@ from cdisc_rules_engine.exceptions.custom_exceptions import InvalidJSONFormat -def test_json_reader_returns_dict(): +def test_json_reader_whitespace_error(): test_dataset_path = ( f"{os.path.dirname(__file__)}/../resources/Datasets_whitespace.json" ) - json_data = JSONReader(encoding="utf-8").from_file(test_dataset_path) - assert isinstance(json_data, dict) - assert "datasets" in json_data + with pytest.raises(InvalidJSONFormat) as exc_info: + JSONReader(encoding="utf-8").from_file(test_dataset_path) + assert "leading/trailing whitespace" in str(exc_info.value.message) def test_whitespace_from_record_keys(): @@ -20,3 +20,13 @@ def test_whitespace_from_record_keys(): with pytest.raises(InvalidJSONFormat) as exc_info: JSONReader(encoding="utf-8").from_file(test_dataset_path) assert "leading/trailing whitespace" in str(exc_info.value.message) + + +def test_json_reader_clean_file_returns_dict(): + test_dataset_path = f"{os.path.dirname(__file__)}/../resources/CG0027-positive.json" + json_data = JSONReader(encoding="utf-8").from_file(test_dataset_path) + assert isinstance(json_data, dict) + assert "datasets" in json_data + assert len(json_data["datasets"]) > 0 + assert json_data["datasets"][0]["domain"] == "AE" + assert len(json_data["datasets"][0]["records"]["AESEQ"]) == 2