From 0568b2cf3dd7911c90dd7709219b8654299d6bee Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Fri, 6 Mar 2026 13:39:59 +0100 Subject: [PATCH 1/6] #1248 Draft solution to bypass Excel sheet processing --- .../dataset_builders/base_dataset_builder.py | 3 + .../define_dictionary_version_validator.py | 1 + .../data_services/excel_data_service.py | 5 +- .../data_services/local_data_service.py | 1 + .../utilities/rule_processor.py | 11 +- tests/resources/CoreIssue1248/data.xlsx | Bin 0 -> 10032 bytes tests/resources/CoreIssue1248/define.xml | 146 ++++++++++++++++++ tests/resources/CoreIssue1248/relrec.json | 131 ++++++++++++++++ tests/resources/CoreIssue1248/sample.yml | 33 ++++ 9 files changed, 328 insertions(+), 3 deletions(-) create mode 100644 tests/resources/CoreIssue1248/data.xlsx create mode 100644 tests/resources/CoreIssue1248/define.xml create mode 100644 tests/resources/CoreIssue1248/relrec.json create mode 100644 tests/resources/CoreIssue1248/sample.yml diff --git a/cdisc_rules_engine/dataset_builders/base_dataset_builder.py b/cdisc_rules_engine/dataset_builders/base_dataset_builder.py index ad1bc81c4..1362361e4 100644 --- a/cdisc_rules_engine/dataset_builders/base_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/base_dataset_builder.py @@ -2,6 +2,7 @@ from cdisc_rules_engine.models.library_metadata_container import ( LibraryMetadataContainer, ) +from cdisc_rules_engine.services.data_services import ExcelDataService from cdisc_rules_engine.services.define_xml.define_xml_reader_factory import ( DefineXMLReaderFactory, ) @@ -83,6 +84,8 @@ def get_dataset(self, **kwargs): ) else: # single dataset. the most common case + if isinstance(self.data_service, ExcelDataService): + self.dataset_path = self.dataset_metadata.filename dataset: DatasetInterface = self.build() dataset = tag_source(dataset, self.dataset_metadata) return dataset diff --git a/cdisc_rules_engine/operations/define_dictionary_version_validator.py b/cdisc_rules_engine/operations/define_dictionary_version_validator.py index f1db3e4a2..24ef865fb 100644 --- a/cdisc_rules_engine/operations/define_dictionary_version_validator.py +++ b/cdisc_rules_engine/operations/define_dictionary_version_validator.py @@ -36,6 +36,7 @@ def _execute_operation(self) -> bool: whodrug_path=self.params.whodrug_path, loinc_path=self.params.loinc_path, ) + # will not be possible to read define.xml when directory path is not equal to define path define_contents = self.data_service.get_define_xml_contents( dataset_name=os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) ) diff --git a/cdisc_rules_engine/services/data_services/excel_data_service.py b/cdisc_rules_engine/services/data_services/excel_data_service.py index dbfa60567..15f95f4ee 100644 --- a/cdisc_rules_engine/services/data_services/excel_data_service.py +++ b/cdisc_rules_engine/services/data_services/excel_data_service.py @@ -78,6 +78,7 @@ def get_file_matching_pattern(self, prefix: str, pattern: str) -> str: @cached_dataset(DatasetTypes.CONTENTS.value) def get_dataset(self, dataset_name: str, **params) -> DatasetInterface: + # sometimes, instead of dataset name, SDTMDatasetMetadata.full_path sent dtype_mapping = { "Char": str, "Num": float, @@ -144,7 +145,8 @@ def get_raw_dataset_metadata( os.path.getmtime(self.dataset_path) ).isoformat(), filename=dataset_name, - full_path=dataset_name, + full_path=self.dataset_path, + # full_path=dataset_name, file_size=0, record_count=len(dataset), ) @@ -191,6 +193,7 @@ def get_define_xml_contents(self, dataset_name: str) -> bytes: """ Reads local define xml file as bytes """ + # will not work if define.xml is in different folder from dataset with open(dataset_name, "rb") as f: return f.read() diff --git a/cdisc_rules_engine/services/data_services/local_data_service.py b/cdisc_rules_engine/services/data_services/local_data_service.py index cffb61bd3..218bfbd50 100644 --- a/cdisc_rules_engine/services/data_services/local_data_service.py +++ b/cdisc_rules_engine/services/data_services/local_data_service.py @@ -146,6 +146,7 @@ def get_define_xml_contents(self, dataset_name: str) -> bytes: """ Reads local define xml file as bytes """ + # will not work if define.xml is in different folder from dataset with open(dataset_name, "rb") as f: return f.read() diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index 061979714..88492df16 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -30,6 +30,7 @@ from cdisc_rules_engine.exceptions.custom_exceptions import OperationError from cdisc_rules_engine.operations import operations_factory from cdisc_rules_engine.services import logger +from cdisc_rules_engine.services.data_services import ExcelDataService from cdisc_rules_engine.utilities.data_processor import DataProcessor from cdisc_rules_engine.utilities.utils import ( get_directory_path, @@ -234,11 +235,15 @@ def rule_applies_to_class( excluded_classes = classes.get("Exclude", []) is_included = True is_excluded = False + if isinstance(self.data_service, ExcelDataService): + dataset_name = dataset_metadata.filename + else: + dataset_name = dataset_metadata.full_path if included_classes: if ALL_KEYWORD in included_classes: return True variables = self.data_service.get_variables_metadata( - dataset_name=dataset_metadata.full_path, datasets=datasets + dataset_name=dataset_name, datasets=datasets ).data.variable_name class_name = self.data_service.get_dataset_class( variables, @@ -252,7 +257,7 @@ def rule_applies_to_class( is_included = False if excluded_classes: variables = self.data_service.get_variables_metadata( - dataset_name=dataset_metadata.full_path, datasets=datasets + dataset_name=dataset_name, datasets=datasets ).data.variable_name class_name = self.data_service.get_dataset_class( variables, @@ -479,6 +484,8 @@ def _execute_operation( get_directory_path(operation_params.dataset_path), filename, ) + if isinstance(self.data_service, ExcelDataService): + file_path = domain_details.filename operation_params.dataframe = self.data_service.get_dataset( dataset_name=file_path ) diff --git a/tests/resources/CoreIssue1248/data.xlsx b/tests/resources/CoreIssue1248/data.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c994860b4283d4bf1e3cc29f19196cdc2d36cbff GIT binary patch literal 10032 zcmeHN^;aBgwryO3G_DCwa7ZAyyF0-N1gG)F-95MjcX!u7aQEOY2^u8OxIB`X_wK!! zJL~-cGxbAt_3Ao%t*+Ypoc*2iDaydWVguj-hyVb941kHhx?%(c0N}#`09XJ-Xl)T2 zYX@U%hj*&3w#HyxCKoG9(i~W5nk)eH)BXQF{)3-DPyD!b7YjuXH8Y85xZgKo4zHq54Wj=u4kxOR?!-L3CoWbn5bt z_Jhu7aHa+LWpba1`hYdl*(=ve`bn2V`Jv(>>`F0us@?eZR$mLF!qJcMdJSUGqy-pZ^n0()KN)c~9kUQ`8LU~z`Y_8w(gz}R*+XqaSR@uz3^Pn@p#-Mm`fadZa zowjLhoffjwSqUA28)d;(=TGu`e1rig{zaViYAh7zPmGg&(%!QtalW%RwgfXX{XG9y zl>fm>{L^1Ai3B&DR3-F> zk?{WHbvLxM!uw%=fb3$Ett=c9n}?#oxjZQ8#?BFej@mv^+^%e+2g7LwGINnEA?-@z z^eKw2w5cdtW^j!{eEL+h8aU3RPKbn+PaK5Hm+Y(6C#Sjk{;C3MMp)&0c~Dg&N7jDa zc&hhYV!LWReGB^l(JP))^*%e?A+hCMgA>pLUM&u5~k9a#4s zjPhv%%H(XASFgt<`l+%Y-dc5Rr^D&4Jse2gr2{8}e&HlVt50?6Uqv#<>m`N{0{~DV z008JuPsYWP+1bV(^xnn>^s{=EsV&KGu>d{anLm>Ka1TkBVa9+;XfUJyUO`vE97DI& z)I%Ox+aRJ%_Tbr1>zmkKRiy;#&$zl889BYWa?r&cqfplxpcC3gqzG1*HOXmFn1IYQ z^9V+=*A1DG(Pw0#&h4IW-!^U~sG>V7^K$|e_h7&FeW~6tPdtm=r_ek@W{@eCw$Lv+ z;?t#!o3lEx05sbTx*aYT3s13 z@r5bQ7!~N812mcTBQG>MJ#3E+Tg6$wND&>=>a0ZY_RBtPP50iMf$C*vibC9GH zGmCZFG)y^~={^?ZkE;5n`B|`z25e~h-LS;dKd?jT6&LV>M4{e#&tFn(r?(BcR_Fto zNpONn!wC5k6QGJeDL$5zX&u@z3H6M@Jx(L`ZXEgqe12|58@`vGp%TaQfew=`avFsC zq9E(La^6>y7$uSEL$OEhat`VwwG!i8{JA}s*R90NA60ohdnkK~Es#G}3`Z0sVF}*} zwNPlgxrjdqHN`J9xdKbe1(q%^?O+;JmqWqU@i51=g9?%pRGtA2zBJr6Cei4a40HwW zYh1d7IK!ZcTMV#2tgzr?IB&*+pbi=O; zhjeI%UaSYTt`D;!qQZ7KB3}80@XYEp?r$?cN#tKeBwbu!aP#zw#FLN+0f}Rw){*zG8$n&sGZ>@K`c?V4^I5f-slc^#C{dpKH`VlPqXHN9Zy)!c z{T*|nleK5mpZv+s1qA*RCH|Ry!DhzB4q)bAAFqDq;k5Wc>m?ST;4$0{I_xoSeQylP zu&*+O85&$zg=q?*rl_bA)gBei%`t&=Lo78HER5OJuIvx5WVer_EzDL<2 za^)X3(Hc#LEp2?%{bMXNiG#oeO^3UZ8Xp^39CT%%K#Wev+#JFvb;|~K4KbhirsG|n z$=iV&4@`wiUtka|NIMG%(ki{TYyDogk&1i9FHVcMZFYjUCBW56u+7X%OB>P9;sh1D zfPXf84o+lcqMjPQTMGzPHqmGHgdtHny%i2)qJDdaD6@)`s=)G&sdj#_B+L8kfNg|1 z*S=wF?o2Z!ev%=o+y1SIIqqs6-a*~SgS8CTxo&oPuc-plHJxMv$`MWc2E7`PqG>|p|sia?- zgx}uN>D2I1)62IU6t{7;iiYOf5fTFwYj3FtC*hC2(gcOFu%O#dXPjj(gPxI)R6lY4ZY}Yz zn3p~k`@czJ3rmQ!kxwbi9Od7V@%1k(S4Mo@We*1v^dUj!O-z1YYym&PQX+>YK7){(>?i}ty?&WLX_8IF_K zweQJ5H-4GziiAxRi*c$Pv-mbcmj4)eXoA|mZQKi;YBsr30noAHmONBZ9v>H=B(Jgo zsC5t3RPa8G)QxA8b$dXcyy8pRpok%I$wgkpi(Rv6@*~o`OgHimk{#-$MyN?&@V;4f z!FN3^!gBIgE9B4(X$)88hSEcO30( z!Ly_DrJ$Vf7}_25%RTC*j+fLUelS8zO_1CY)+*%!Ciz1+kL6b=4yEnI7#w@5J|H^w zWPJ#6=qdW3u@_=TgmHlQ&bQ1_a-D;LX?G%oOkrCNMdn4h{`xtg=C+4+Vg`A5`wp5B*S7Y{mlQS*W#++cH!K#=k?ph(gf_2aOO-IQrYzGaI z?Q*)~n1S_N9jt1ooHR&1Srd^AJy(+?>Y&s?`nH2)mJWphs(I|RXN$CFFMNpd0Z7F5 zQ3~zIO~ii6?IjnCXmg`5jA-$;WH2o9u(X$8B>0Vp|JH}rde54{83FO9dqiMF`BLFS zQnJzZ=$!X09EyKjP$8hk5E^8exa?UmWv<$z^RQ_N=Ff+C?X8v{lRZ9*v$8DC_vD?w-A4zwQv>K=u(Dtz#EMJ1+8qAi_ z_@>ycs+;6T#=4^Q7i@-sw@*0bFpgG>T&bW6&E)L!Y(6LSy5z~Qlc)0%xle~9eAXRZrL9_>$+_Y^qkKI-d$R;>O& z#D=gzx_qSqU`zfi31(^;&zd`e_(PZa{*Vbj_NrYgB5)b?HVg-IL<_)F3O0LtKNBDp z2Z}vibMaU8Tt=FM+&X77df%02+Bc?ChL@HWPx>V`-RAHtL z)AYfQwqoZ)kJ^O=Qe;-EL@?&MONL;~8NMFGH)Y-6rIbg<07;4)Sz;@jJ4j*<3=fF| zm92WdUHMKMeqrC|=Vy&M(IT}V6X^PaxCNz!96zrUsVEHbAtA$9lFTTp35_JOg-Xsa z97^lN23yxQ$91~Y=dD3oRTtiWtiZ%dMzXg!a}B_sj`4IN?4{Av;EH3Lt$uf+LZh_3 zY}^GeVDgQK{Fn@#W+nLu0hVa5T?g^VzidB#2s%a~F_|g};bZdgR@fNuEgejWO^B=T zluW|CUjYJ%*7Wy%4>${J=R1Dx)zJ`$6?a?KYOC#|(CAUFu^V1DsD{tfGxks+_WQ#z zmHS+)2Sixcf_1>DnP0sWVzl$#Q=EnmgFY;(Nvjh4F{VJoOqB?!K6Tl0759}6t~Lw| z$9Qy&-D~Q0N^<_tNoZWin|esvSr;UY=UJaYd1cBBHU7JkawnAG?T>P2d=}-@m$}U@ zwv_;>5P8ApZAn2(JD@n8TWJ*F@%uz=QqYX#OF9Q_;VoO~) zR4bJ#^<$gV*n(DZI2y0HswVcEd^3;l4Tu71GKP4+_5%(MhqiUh3!LHN_Hh6Q!H#h8 z$<{IzJi#v|l9CpQj220=6H=vw>AUge?3B`P3&t+ zY*A1}m)2}Q&3U;BUlcQ8zC=)e!W2i5WHSAnXJY z5mgg$<;e-h$6*a)`o^Ubx*HVPveYBkO5cY9wSuQ ziFAS-<0xdwB(h3;Q`gk1s#T-j-cO^FUTvc^>iNwjBD)vt=45c(8WoOCK4v?+5hAYV z^@1G}(McFDZZ9E5MVhfV_YoHJ?u`;(2%R21zUp1jv6_}pqtYI{P%b;7gm;Iy;sLvNnLK0`d+Y7t-zs#P${3sKDR0Ip)*ko37S?y6|NYLz&Yjp+t#V`14zZ% zVFSF^rFnIPZ_LgQr5@<6QkUbAv{e$C8MKc;IX^V5Ihk=#4NSQtm;yw%o)=w0PMBD7o}#4`%OPI8skeSx zt&TyhXk(yhJcnN#;nhD4rLK663g{GQf=MA!r{Gh z$EixS?4UUA4g8^74UL9)M_}s-?u)^(#Lqv588!ya{KMyYAi^@)g}W52=h={wXkiX{ zYVB++HX`=f(r-39F}7KF6gHoRD3gXJKVwo39w91E>TOoeUd!#n3`gh-(e*{$3+b~n z)cYItl1`dTv&y^AnJQFsHFq73Kyw3U1fN6j{jbk-3?8TE%HIbFAbAT~1j7Sa$C%^b zhz@X66AGmBs%(RvNJ1J01&Y=Fz^UQxEI?uEgMR~>UXSZRdO=2MdxX_vg9VdxjW9EwS}53 zO^Z=R?n3R%?Nh}{fnN45((J;<{|?#*8TIw9Z_v4h_X;;l6O%@TRK_0ryK^^?NlI#p zb9HcZ-fQY?m(vqZzBSKZ6Wq_1OQ|6#j6O41+$nk|c+_5q?epNA&_)~ZVggnManFk% zw~$^B1zDuOc9Dh5aWslZTdlzL{6Kq5T1jczfhCJ#Yl@d4#kc?hP#?I?lDrq>p*KM; zrLRDSi+v+^KpVf>+*~9=;_lVZfqZBRU0nvH5Qp4Sl!;XwxfhOf^W%7j`O%Y5o*!{KAr+1>L|O>HO%|1ZafN){k;p`B~(X_G2X z+$_K<`vji8gH|u#&iV{lvCx<2k^{_a@Q_G10om0QF>t6YKU)uffdF(l}>Ue4@ z+csHp^8T490}l~T$vSXig964s8fhc>yjcR3Lth7C7+CufDiuOgBY!O?}94F`C4|C4gHpqGIC87Q=~99O9P`YLc|+nc>|lsFEiT>@=$RY zzbXrcTxOkGou7(i)eAMK?*g6sP}cYqIG&%YGH(OARuA*`i>oCd+j~M#R*nezZb!Pe z59*bLvie^Jls*EzAN(e1=GQP1RW`oCOZb{7>F2oj2~E6!9vAJLSfE7#y}((NGN_py zi}*xyf;NwoIZfC^-vp3Htt;{oFL_I{B;O>hOtr+2oG?0nq%`ZtV%aa4zQ)YMU8-j6 zcnOwQqeL&|)Xroz8l+}o!sW2@((}fO8SpQrsQ|%^M-aA7fz2G*2oenM5eUyB1Jv!@ zQ(@||!zNSw@7K5Lg0Z3`wJ_CNx4sbU-Ktm2e;rDm!Eup#7`b43xzsN;8A{S`rE%cc z4=;MrJyJzJ%DlY}>}jlcd?^E^KEa>|zh&*3{NMz))(kE6HF9LGLl45;wi;Xy79BVj zB$kviFumH~@1fJPS?S){t~7m2niP0?b^qG!+_FUYM*sA3F`oKX7yw!*BO60SdmCFY zv!RW>@z0D+nnU&9Cg;6s}7I zMH5iGS~HCt`BkzGXc@Yuop5mcp_E(6RXB@*`UHJ?hl1ctvEb0n0&q(3m7}(0^VPQY zXI5cc4UzR&gvhARUm`^v#HC-6e@_A-_+aB}P3Zgev*ljxqpsII*6J%#%a?a`_9c(m z6?BX(xPY{WUHU<#;Pa?jrAMGCMpFM=w4YMfw_&7H=pvcBgAmBi znbf~hFl?YEBJ_!Zz$eoH1W^4!K?7Uce=v~jzYP5U=$EEkE#J)oT#2~~9^i4*0tzP> z2801&FcY-qKY+~{F_&thij4D~d3O(Vt({9Wv00>c`Se^Itq!;D42KSjsZqZPFiRWN z=t!!8EpuwFu8rW6wUKAiz*Jh$Nf;CEO9U?D^YUYEt@FaN0kLYTUKxT#1-5zjxm4*c zLK=FonN*RBQCN?{y3CrKZt|hSJ)#SCh;b3wgYSBdQ8W)Y%T7BAny2O3uQ+wxkQv}c z(|o*;;FDhCv}uQhzNLzgWO}A14)j<9HfE)$MI<08g%Jk7F>@db{g#mUN`EjeYyxFa zqufsk*Ia`F6pqT|*+V*48U@)0-CtCD-CBO$*R{oL*oJrN*n1DAMj(Rn6)`ZJfG6K% z+5e)CblQ$z>f=qdW@H9#w5!0Gs%3G)I;w%Ef!ST{GECwR`C)i9pr*G#WxSltW0<++ zpUF)KxAcjGz1OG$di%Vz;jW#`+Wc#_rK~vE*fiSaO$U6B zPdjY$f82(GW_+rXf4{5f_t*G0dyEui{tEEdoezHoNPiN{pY}og9{Bq-_>a)8r=HSp zv*F)^|2hTxBNPBYc=GlCekS%i&hImkKahl;8q@!7Lh?Jx@6GH#P{z>y7UfrK`*)Pz z+i8EG#A5zJ`MtUJJHYQtn?C>~2z~+lvC#QF^mh~WM=1XDe}w*SseVWJYo-1p9sq#6 s0091_ZvP(s*DU+za2K*ah5s|>D$2k;apdP#n`eN&Cu^We`SaWV0K_dt5C8xG literal 0 HcmV?d00001 diff --git a/tests/resources/CoreIssue1248/define.xml b/tests/resources/CoreIssue1248/define.xml new file mode 100644 index 000000000..50bc56b7b --- /dev/null +++ b/tests/resources/CoreIssue1248/define.xml @@ -0,0 +1,146 @@ + + + + + CDISCPILOT01 + Study Data Tabulation Model Metadata Submission Guidelines Sample Study + CDISCPILOT01 + + + + + + + + + + + + + + Related Records + + + + + + + + + + + relrec.xpt + + + + + Study Identifier + + + + + + Related Domain Abbreviation + + + + + + + Unique Subject Identifier + + + + + + Identifying Variable + + + + + + Identifying Variable Value + + + + + + Relationship Type + + + + + + + Relationship Identifier + + + + + + + Adverse Events + + + + + + Disposition + + + + + + Death Details + + + + + + Findings About Events or Interventions + + + + + + + + + Many + + + + + + One + + + + + + + Annotated CRF + + + Reviewers Guide + + + + \ No newline at end of file diff --git a/tests/resources/CoreIssue1248/relrec.json b/tests/resources/CoreIssue1248/relrec.json new file mode 100644 index 000000000..586ddece5 --- /dev/null +++ b/tests/resources/CoreIssue1248/relrec.json @@ -0,0 +1,131 @@ +{ + "datasetJSONCreationDateTime": "2024-11-11T15:09:17", + "datasetJSONVersion": "1.1.0", + "fileOID": "www.cdisc.org/StudyMSGv2/1/Define-XML_2.1.0/2024-11-11/relrec", + "dbLastModifiedDateTime": "2020-08-21T09:14:25", + "originator": "CDISC SDTM MSG Team", + "sourceSystem": { + "name": "SAS on X64_10PRO", + "version": "9.0401M7" + }, + "studyOID": "cdisc.com/CDISCPILOT01", + "metaDataVersionOID": "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7", + "metaDataRef": "define.xml", + "itemGroupOID": "IG.RELREC", + "records": 6, + "name": "RELREC", + "label": "Related Records", + "columns": [ + { + "itemOID": "IT.RELREC.STUDYID", + "name": "STUDYID", + "label": "Study Identifier", + "dataType": "string", + "length": 12, + "keySequence": 1 + }, + { + "itemOID": "IT.RELREC.RDOMAIN", + "name": "RDOMAIN", + "label": "Related Domain Abbreviation", + "dataType": "string", + "length": 6, + "keySequence": 2 + }, + { + "itemOID": "IT.RELREC.USUBJID", + "name": "USUBJID", + "label": "Unique Subject Identifier", + "dataType": "string", + "length": 8, + "keySequence": 3 + }, + { + "itemOID": "IT.RELREC.IDVAR", + "name": "IDVAR", + "label": "Identifying Variable", + "dataType": "string", + "length": 200, + "keySequence": 4 + }, + { + "itemOID": "IT.RELREC.IDVARVAL", + "name": "IDVARVAL", + "label": "Identifying Variable Value", + "dataType": "string", + "length": 200, + "keySequence": 5 + }, + { + "itemOID": "IT.RELREC.RELTYPE", + "name": "RELTYPE", + "label": "Relationship Type", + "dataType": "string", + "length": 4 + }, + { + "itemOID": "IT.RELREC.RELID", + "name": "RELID", + "label": "Relationship Identifier", + "dataType": "string", + "length": 200, + "keySequence": 6 + } + ], + "rows": [ + [ + "CDISCPILOT01", + "AE", + "", + "AELNKID", + "", + "ONE", + "AEDS" + ], + [ + "CDISCPILOT01", + "DS", + "", + "DSLNKID", + "", + "ONE", + "AEDS" + ], + [ + "CDISCPILOT01", + "AE", + "", + "AELNKID", + "", + "ONE", + "AEDD" + ], + [ + "CDISCPILOT01", + "DD", + "", + "DDLNKID", + "", + "ONE", + "AEDD" + ], + [ + "CDISCPILOT01", + "AE", + "", + "AELNKID", + "", + "ONE", + "AEFA" + ], + [ + "CDISCPILOT01", + "FA", + "", + "FALNKGRP", + "", + "MANY", + "AEFA" + ] + ] +} \ No newline at end of file diff --git a/tests/resources/CoreIssue1248/sample.yml b/tests/resources/CoreIssue1248/sample.yml new file mode 100644 index 000000000..5b57e1672 --- /dev/null +++ b/tests/resources/CoreIssue1248/sample.yml @@ -0,0 +1,33 @@ +custom_id: SD1129 +Authorities: + - Organization: TEST + Standards: + - Name: SDTMIG + Version: '3.2' + Category: SD1129 +Check: + all: + - name: RELTYPE + operator: non_empty +Operations: + - id: $test + operator: define_variable_metadata + name: RELID + attribute_name: define_variable_label +Core: + Id: SD1129 + Status: Published + Version: 1 +Description: TEST +Executability: Fully Executable +Outcome: + Message: TEST +Rule Type: Record Data +Scope: + Classes: + Include: + - RELATIONSHIP + Domains: + Include: + - RELREC +Sensitivity: Record \ No newline at end of file From 1343804ceb1db659580e829d3112d633167ee1b7 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Mon, 9 Mar 2026 11:52:40 +0100 Subject: [PATCH 2/6] #1248 added define_xml_path parameter to operations --- .../dataset_builders/base_dataset_builder.py | 3 --- cdisc_rules_engine/models/operation_params.py | 1 + .../operations/define_variable_metadata.py | 4 +--- cdisc_rules_engine/rules_engine.py | 1 + .../services/data_services/excel_data_service.py | 2 -- .../services/data_services/local_data_service.py | 1 - cdisc_rules_engine/utilities/rule_processor.py | 9 ++------- .../CoreIssue1248/{ => define_subfolder}/data.xlsx | Bin 8 files changed, 5 insertions(+), 16 deletions(-) rename tests/resources/CoreIssue1248/{ => define_subfolder}/data.xlsx (100%) diff --git a/cdisc_rules_engine/dataset_builders/base_dataset_builder.py b/cdisc_rules_engine/dataset_builders/base_dataset_builder.py index 1362361e4..ad1bc81c4 100644 --- a/cdisc_rules_engine/dataset_builders/base_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/base_dataset_builder.py @@ -2,7 +2,6 @@ from cdisc_rules_engine.models.library_metadata_container import ( LibraryMetadataContainer, ) -from cdisc_rules_engine.services.data_services import ExcelDataService from cdisc_rules_engine.services.define_xml.define_xml_reader_factory import ( DefineXMLReaderFactory, ) @@ -84,8 +83,6 @@ def get_dataset(self, **kwargs): ) else: # single dataset. the most common case - if isinstance(self.data_service, ExcelDataService): - self.dataset_path = self.dataset_metadata.filename dataset: DatasetInterface = self.build() dataset = tag_source(dataset, self.dataset_metadata) return dataset diff --git a/cdisc_rules_engine/models/operation_params.py b/cdisc_rules_engine/models/operation_params.py index 7174f09e9..3cc8b938a 100644 --- a/cdisc_rules_engine/models/operation_params.py +++ b/cdisc_rules_engine/models/operation_params.py @@ -62,3 +62,4 @@ class OperationParams: value_is_reference: bool = False namespace: str = None delimiter: str = None + define_xml_path: str = None diff --git a/cdisc_rules_engine/operations/define_variable_metadata.py b/cdisc_rules_engine/operations/define_variable_metadata.py index ab5e73eea..eb1aa7c10 100644 --- a/cdisc_rules_engine/operations/define_variable_metadata.py +++ b/cdisc_rules_engine/operations/define_variable_metadata.py @@ -1,9 +1,7 @@ -from cdisc_rules_engine.constants.define_xml_constants import DEFINE_XML_FILE_NAME from cdisc_rules_engine.services.define_xml.define_xml_reader_factory import ( DefineXMLReaderFactory, ) from .base_operation import BaseOperation -import os class DefineVariableMetadata(BaseOperation): @@ -33,7 +31,7 @@ def _execute_operation(self): } """ define_contents = self.data_service.get_define_xml_contents( - dataset_name=os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) + dataset_name=self.params.define_xml_path ) define_reader = DefineXMLReaderFactory.from_file_contents(define_contents) variables_metadata = define_reader.extract_variables_metadata( diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index 50f36b009..c07ea3ab2 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -446,6 +446,7 @@ def execute_rule( standard_substandard=self.standard_substandard, external_dictionaries=self.external_dictionaries, ct_packages=ct_packages, + define_xml_path=self.define_xml_path, ) dataset_variable = DatasetVariable( dataset, diff --git a/cdisc_rules_engine/services/data_services/excel_data_service.py b/cdisc_rules_engine/services/data_services/excel_data_service.py index 15f95f4ee..c304a6979 100644 --- a/cdisc_rules_engine/services/data_services/excel_data_service.py +++ b/cdisc_rules_engine/services/data_services/excel_data_service.py @@ -78,7 +78,6 @@ def get_file_matching_pattern(self, prefix: str, pattern: str) -> str: @cached_dataset(DatasetTypes.CONTENTS.value) def get_dataset(self, dataset_name: str, **params) -> DatasetInterface: - # sometimes, instead of dataset name, SDTMDatasetMetadata.full_path sent dtype_mapping = { "Char": str, "Num": float, @@ -193,7 +192,6 @@ def get_define_xml_contents(self, dataset_name: str) -> bytes: """ Reads local define xml file as bytes """ - # will not work if define.xml is in different folder from dataset with open(dataset_name, "rb") as f: return f.read() diff --git a/cdisc_rules_engine/services/data_services/local_data_service.py b/cdisc_rules_engine/services/data_services/local_data_service.py index 218bfbd50..cffb61bd3 100644 --- a/cdisc_rules_engine/services/data_services/local_data_service.py +++ b/cdisc_rules_engine/services/data_services/local_data_service.py @@ -146,7 +146,6 @@ def get_define_xml_contents(self, dataset_name: str) -> bytes: """ Reads local define xml file as bytes """ - # will not work if define.xml is in different folder from dataset with open(dataset_name, "rb") as f: return f.read() diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index 88492df16..6edd8d223 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -30,7 +30,6 @@ from cdisc_rules_engine.exceptions.custom_exceptions import OperationError from cdisc_rules_engine.operations import operations_factory from cdisc_rules_engine.services import logger -from cdisc_rules_engine.services.data_services import ExcelDataService from cdisc_rules_engine.utilities.data_processor import DataProcessor from cdisc_rules_engine.utilities.utils import ( get_directory_path, @@ -235,10 +234,7 @@ def rule_applies_to_class( excluded_classes = classes.get("Exclude", []) is_included = True is_excluded = False - if isinstance(self.data_service, ExcelDataService): - dataset_name = dataset_metadata.filename - else: - dataset_name = dataset_metadata.full_path + dataset_name = dataset_metadata.full_path if included_classes: if ALL_KEYWORD in included_classes: return True @@ -372,6 +368,7 @@ def perform_rule_operations( for ct_package_type in operation.get("ct_package_types", []) ], ct_version=operation.get("version"), + define_xml_path=kwargs.get("define_xml_path"), dataframe=dataset_copy, dataset_path=dataset_path, datasets=datasets, @@ -484,8 +481,6 @@ def _execute_operation( get_directory_path(operation_params.dataset_path), filename, ) - if isinstance(self.data_service, ExcelDataService): - file_path = domain_details.filename operation_params.dataframe = self.data_service.get_dataset( dataset_name=file_path ) diff --git a/tests/resources/CoreIssue1248/data.xlsx b/tests/resources/CoreIssue1248/define_subfolder/data.xlsx similarity index 100% rename from tests/resources/CoreIssue1248/data.xlsx rename to tests/resources/CoreIssue1248/define_subfolder/data.xlsx From 4b8fe99a250d03b1d34bc7e830905538e6c64a2d Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Mon, 9 Mar 2026 15:29:27 +0100 Subject: [PATCH 3/6] #1248 regression test and fixed when no define path provided to operation --- .../define_dictionary_version_validator.py | 10 +++- .../operations/define_variable_metadata.py | 12 +++- .../test_Issues/test_CoreIssue1248.py | 54 ++++++++++++++++++ .../{define_subfolder => }/data.xlsx | Bin .../{ => define_subfolder}/define.xml | 0 5 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 tests/QARegressionTests/test_Issues/test_CoreIssue1248.py rename tests/resources/CoreIssue1248/{define_subfolder => }/data.xlsx (100%) rename tests/resources/CoreIssue1248/{ => define_subfolder}/define.xml (100%) diff --git a/cdisc_rules_engine/operations/define_dictionary_version_validator.py b/cdisc_rules_engine/operations/define_dictionary_version_validator.py index 24ef865fb..afc2294dc 100644 --- a/cdisc_rules_engine/operations/define_dictionary_version_validator.py +++ b/cdisc_rules_engine/operations/define_dictionary_version_validator.py @@ -36,9 +36,15 @@ def _execute_operation(self) -> bool: whodrug_path=self.params.whodrug_path, loinc_path=self.params.loinc_path, ) - # will not be possible to read define.xml when directory path is not equal to define path + define_path = ( + self.params.define_xml_path + if self.params.define_xml_path + else os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) + ) + if not os.path.exists(define_path): + raise FileNotFoundError("Define XML file %s not found", define_path) define_contents = self.data_service.get_define_xml_contents( - dataset_name=os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) + dataset_name=define_path ) define_reader = DefineXMLReaderFactory.from_file_contents(define_contents) define_dictionary_version = define_reader.get_external_dictionary_version( diff --git a/cdisc_rules_engine/operations/define_variable_metadata.py b/cdisc_rules_engine/operations/define_variable_metadata.py index eb1aa7c10..0d697f5d5 100644 --- a/cdisc_rules_engine/operations/define_variable_metadata.py +++ b/cdisc_rules_engine/operations/define_variable_metadata.py @@ -1,7 +1,10 @@ +import os.path + from cdisc_rules_engine.services.define_xml.define_xml_reader_factory import ( DefineXMLReaderFactory, ) from .base_operation import BaseOperation +from cdisc_rules_engine.constants.define_xml_constants import DEFINE_XML_FILE_NAME class DefineVariableMetadata(BaseOperation): @@ -30,8 +33,15 @@ def _execute_operation(self): ... } """ + define_path = ( + self.params.define_xml_path + if self.params.define_xml_path + else os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) + ) + if not os.path.exists(define_path): + raise FileNotFoundError("Define XML file %s not found", define_path) define_contents = self.data_service.get_define_xml_contents( - dataset_name=self.params.define_xml_path + dataset_name=define_path ) define_reader = DefineXMLReaderFactory.from_file_contents(define_contents) variables_metadata = define_reader.extract_variables_metadata( diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py b/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py new file mode 100644 index 000000000..1c429edbe --- /dev/null +++ b/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py @@ -0,0 +1,54 @@ +import os +import subprocess +import unittest + +import pytest +import json +from conftest import get_python_executable + + +@pytest.mark.regression +class TestCoreIssue1248(unittest.TestCase): + def test_define_path_used(self): + command = [ + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "sdtmig", + "-v", + "3-2", + "-of", + "JSON", + "-lr", + "tests/resources/CoreIssue1248/sample.yml", + "-cs", + "-dxp", + "tests/resources/CoreIssue1248/define_subfolder/define.xml", + "-dp", + "tests/resources/CoreIssue1248/relrec.json", + ] + subprocess.run(command, check=True) + + # Get the latest created report file + files = os.listdir() + json_files = [ + file + for file in files + if file.startswith("CORE-Report-") and file.endswith(".json") + ] + json_report_path = sorted(json_files)[-1] + # Open the JSON report file + json_report = json.load(open(json_report_path)) + assert { + "Conformance_Details", + "Dataset_Details", + "Issue_Summary", + "Issue_Details", + "Rules_Report", + }.issubset(json_report.keys()) + assert len(json_report["Issue_Details"]) == 6 + assert json_report["Rules_Report"][0]["status"] == "ISSUE REPORTED" + if os.path.exists(json_report_path): + os.remove(json_report_path) diff --git a/tests/resources/CoreIssue1248/define_subfolder/data.xlsx b/tests/resources/CoreIssue1248/data.xlsx similarity index 100% rename from tests/resources/CoreIssue1248/define_subfolder/data.xlsx rename to tests/resources/CoreIssue1248/data.xlsx diff --git a/tests/resources/CoreIssue1248/define.xml b/tests/resources/CoreIssue1248/define_subfolder/define.xml similarity index 100% rename from tests/resources/CoreIssue1248/define.xml rename to tests/resources/CoreIssue1248/define_subfolder/define.xml From 3d5eb7d0c0a64bfa060cd1cb22f9f0e2def16e8d Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Mon, 9 Mar 2026 15:47:34 +0100 Subject: [PATCH 4/6] #1248 prettier --- tests/resources/CoreIssue1248/relrec.json | 62 +++-------------------- tests/resources/CoreIssue1248/sample.yml | 4 +- 2 files changed, 9 insertions(+), 57 deletions(-) diff --git a/tests/resources/CoreIssue1248/relrec.json b/tests/resources/CoreIssue1248/relrec.json index 586ddece5..e26b186cc 100644 --- a/tests/resources/CoreIssue1248/relrec.json +++ b/tests/resources/CoreIssue1248/relrec.json @@ -73,59 +73,11 @@ } ], "rows": [ - [ - "CDISCPILOT01", - "AE", - "", - "AELNKID", - "", - "ONE", - "AEDS" - ], - [ - "CDISCPILOT01", - "DS", - "", - "DSLNKID", - "", - "ONE", - "AEDS" - ], - [ - "CDISCPILOT01", - "AE", - "", - "AELNKID", - "", - "ONE", - "AEDD" - ], - [ - "CDISCPILOT01", - "DD", - "", - "DDLNKID", - "", - "ONE", - "AEDD" - ], - [ - "CDISCPILOT01", - "AE", - "", - "AELNKID", - "", - "ONE", - "AEFA" - ], - [ - "CDISCPILOT01", - "FA", - "", - "FALNKGRP", - "", - "MANY", - "AEFA" - ] + ["CDISCPILOT01", "AE", "", "AELNKID", "", "ONE", "AEDS"], + ["CDISCPILOT01", "DS", "", "DSLNKID", "", "ONE", "AEDS"], + ["CDISCPILOT01", "AE", "", "AELNKID", "", "ONE", "AEDD"], + ["CDISCPILOT01", "DD", "", "DDLNKID", "", "ONE", "AEDD"], + ["CDISCPILOT01", "AE", "", "AELNKID", "", "ONE", "AEFA"], + ["CDISCPILOT01", "FA", "", "FALNKGRP", "", "MANY", "AEFA"] ] -} \ No newline at end of file +} diff --git a/tests/resources/CoreIssue1248/sample.yml b/tests/resources/CoreIssue1248/sample.yml index 5b57e1672..529298774 100644 --- a/tests/resources/CoreIssue1248/sample.yml +++ b/tests/resources/CoreIssue1248/sample.yml @@ -3,7 +3,7 @@ Authorities: - Organization: TEST Standards: - Name: SDTMIG - Version: '3.2' + Version: "3.2" Category: SD1129 Check: all: @@ -30,4 +30,4 @@ Scope: Domains: Include: - RELREC -Sensitivity: Record \ No newline at end of file +Sensitivity: Record From 8e8d7614b750583d178bcfe8d82b3d3e2cba1696 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Wed, 11 Mar 2026 14:51:35 +0100 Subject: [PATCH 5/6] #1248 rolled back changes in excel_data_service. additional tests. --- .../define_dictionary_version_validator.py | 2 +- .../operations/define_variable_metadata.py | 2 +- .../data_services/excel_data_service.py | 3 +- .../test_Issues/test_CoreIssue1248.py | 180 +++++++++++++++--- .../CoreIssue1248/data_and_define/data.xlsx | Bin 0 -> 10032 bytes .../CoreIssue1248/data_and_define/define.xml | 146 ++++++++++++++ 6 files changed, 305 insertions(+), 28 deletions(-) create mode 100644 tests/resources/CoreIssue1248/data_and_define/data.xlsx create mode 100644 tests/resources/CoreIssue1248/data_and_define/define.xml diff --git a/cdisc_rules_engine/operations/define_dictionary_version_validator.py b/cdisc_rules_engine/operations/define_dictionary_version_validator.py index afc2294dc..ba0506e08 100644 --- a/cdisc_rules_engine/operations/define_dictionary_version_validator.py +++ b/cdisc_rules_engine/operations/define_dictionary_version_validator.py @@ -42,7 +42,7 @@ def _execute_operation(self) -> bool: else os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) ) if not os.path.exists(define_path): - raise FileNotFoundError("Define XML file %s not found", define_path) + raise FileNotFoundError(f"Define XML file {define_path} not found") define_contents = self.data_service.get_define_xml_contents( dataset_name=define_path ) diff --git a/cdisc_rules_engine/operations/define_variable_metadata.py b/cdisc_rules_engine/operations/define_variable_metadata.py index 0d697f5d5..a196f3cb3 100644 --- a/cdisc_rules_engine/operations/define_variable_metadata.py +++ b/cdisc_rules_engine/operations/define_variable_metadata.py @@ -39,7 +39,7 @@ def _execute_operation(self): else os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) ) if not os.path.exists(define_path): - raise FileNotFoundError("Define XML file %s not found", define_path) + raise FileNotFoundError(f"Define XML file {define_path} not found") define_contents = self.data_service.get_define_xml_contents( dataset_name=define_path ) diff --git a/cdisc_rules_engine/services/data_services/excel_data_service.py b/cdisc_rules_engine/services/data_services/excel_data_service.py index c304a6979..dbfa60567 100644 --- a/cdisc_rules_engine/services/data_services/excel_data_service.py +++ b/cdisc_rules_engine/services/data_services/excel_data_service.py @@ -144,8 +144,7 @@ def get_raw_dataset_metadata( os.path.getmtime(self.dataset_path) ).isoformat(), filename=dataset_name, - full_path=self.dataset_path, - # full_path=dataset_name, + full_path=dataset_name, file_size=0, record_count=len(dataset), ) diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py b/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py index 1c429edbe..b5b60572d 100644 --- a/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py +++ b/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py @@ -1,6 +1,5 @@ import os import subprocess -import unittest import pytest import json @@ -8,27 +7,160 @@ @pytest.mark.regression -class TestCoreIssue1248(unittest.TestCase): - def test_define_path_used(self): - command = [ - f"{get_python_executable()}", - "-m", - "core", - "validate", - "-s", - "sdtmig", - "-v", - "3-2", - "-of", - "JSON", - "-lr", - "tests/resources/CoreIssue1248/sample.yml", - "-cs", - "-dxp", - "tests/resources/CoreIssue1248/define_subfolder/define.xml", - "-dp", - "tests/resources/CoreIssue1248/relrec.json", - ] +class TestCoreIssue1248: + @pytest.mark.parametrize( + "command,rules_report,num_issues", + [ + # define path provided will lead to successful execution + ( + [ + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "sdtmig", + "-v", + "3-2", + "-of", + "JSON", + "-lr", + os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"), + "-cs", + "-dxp", + os.path.join( + "tests", + "resources", + "CoreIssue1248", + "define_subfolder", + "define.xml", + ), + "-ps", + "1", + "-dp", + os.path.join("tests", "resources", "CoreIssue1248", "data.xlsx"), + ], + [ + { + "core_id": "SD1129", + "version": "1", + "cdisc_rule_id": "", + "fda_rule_id": "", + "message": "TEST", + "status": "ISSUE REPORTED", + } + ], + 2, + ), + # JSON data file and no define.xml in same folder and no -dxp param will provide error + ( + [ + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "sdtmig", + "-v", + "3-2", + "-of", + "JSON", + "-lr", + os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"), + "-cs", + "-dp", + os.path.join("tests", "resources", "CoreIssue1248", "relrec.json"), + "-ps", + "1", + ], + [ + { + "core_id": "SD1129", + "version": "1", + "cdisc_rule_id": "", + "fda_rule_id": "", + "message": "TEST", + "status": "EXECUTION ERROR", + } + ], + 1, + ), + # no define.xml in same path as data.xlsx file will provide error + ( + [ + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "sdtmig", + "-v", + "3-2", + "-of", + "JSON", + "-lr", + os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"), + "-cs", + "-dp", + os.path.join("tests", "resources", "CoreIssue1248", "data.xlsx"), + "-ps", + "1", + ], + [ + { + "core_id": "SD1129", + "version": "1", + "cdisc_rule_id": "", + "fda_rule_id": "", + "message": "TEST", + "status": "EXECUTION ERROR", + } + ], + 1, + ), + # define.xml in same folder as the data.xls and no -dxp provided will provide error until + # in ExcelDataService dataset metadata creation full_path=dataset_name + ( + [ + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "sdtmig", + "-v", + "3-2", + "-of", + "JSON", + "-lr", + os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"), + "-cs", + "-dp", + os.path.join( + "tests", + "resources", + "CoreIssue1248", + "data_and_define", + "data.xlsx", + ), + "-ps", + "1", + ], + [ + { + "core_id": "SD1129", + "version": "1", + "cdisc_rule_id": "", + "fda_rule_id": "", + "message": "TEST", + "status": "EXECUTION ERROR", + } + ], + 1, + ), + ], + ) + def test_define_path_used(self, command, rules_report, num_issues): subprocess.run(command, check=True) # Get the latest created report file @@ -48,7 +180,7 @@ def test_define_path_used(self): "Issue_Details", "Rules_Report", }.issubset(json_report.keys()) - assert len(json_report["Issue_Details"]) == 6 - assert json_report["Rules_Report"][0]["status"] == "ISSUE REPORTED" + assert len(json_report["Issue_Details"]) == num_issues + assert json_report["Rules_Report"] == rules_report if os.path.exists(json_report_path): os.remove(json_report_path) diff --git a/tests/resources/CoreIssue1248/data_and_define/data.xlsx b/tests/resources/CoreIssue1248/data_and_define/data.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c994860b4283d4bf1e3cc29f19196cdc2d36cbff GIT binary patch literal 10032 zcmeHN^;aBgwryO3G_DCwa7ZAyyF0-N1gG)F-95MjcX!u7aQEOY2^u8OxIB`X_wK!! zJL~-cGxbAt_3Ao%t*+Ypoc*2iDaydWVguj-hyVb941kHhx?%(c0N}#`09XJ-Xl)T2 zYX@U%hj*&3w#HyxCKoG9(i~W5nk)eH)BXQF{)3-DPyD!b7YjuXH8Y85xZgKo4zHq54Wj=u4kxOR?!-L3CoWbn5bt z_Jhu7aHa+LWpba1`hYdl*(=ve`bn2V`Jv(>>`F0us@?eZR$mLF!qJcMdJSUGqy-pZ^n0()KN)c~9kUQ`8LU~z`Y_8w(gz}R*+XqaSR@uz3^Pn@p#-Mm`fadZa zowjLhoffjwSqUA28)d;(=TGu`e1rig{zaViYAh7zPmGg&(%!QtalW%RwgfXX{XG9y zl>fm>{L^1Ai3B&DR3-F> zk?{WHbvLxM!uw%=fb3$Ett=c9n}?#oxjZQ8#?BFej@mv^+^%e+2g7LwGINnEA?-@z z^eKw2w5cdtW^j!{eEL+h8aU3RPKbn+PaK5Hm+Y(6C#Sjk{;C3MMp)&0c~Dg&N7jDa zc&hhYV!LWReGB^l(JP))^*%e?A+hCMgA>pLUM&u5~k9a#4s zjPhv%%H(XASFgt<`l+%Y-dc5Rr^D&4Jse2gr2{8}e&HlVt50?6Uqv#<>m`N{0{~DV z008JuPsYWP+1bV(^xnn>^s{=EsV&KGu>d{anLm>Ka1TkBVa9+;XfUJyUO`vE97DI& z)I%Ox+aRJ%_Tbr1>zmkKRiy;#&$zl889BYWa?r&cqfplxpcC3gqzG1*HOXmFn1IYQ z^9V+=*A1DG(Pw0#&h4IW-!^U~sG>V7^K$|e_h7&FeW~6tPdtm=r_ek@W{@eCw$Lv+ z;?t#!o3lEx05sbTx*aYT3s13 z@r5bQ7!~N812mcTBQG>MJ#3E+Tg6$wND&>=>a0ZY_RBtPP50iMf$C*vibC9GH zGmCZFG)y^~={^?ZkE;5n`B|`z25e~h-LS;dKd?jT6&LV>M4{e#&tFn(r?(BcR_Fto zNpONn!wC5k6QGJeDL$5zX&u@z3H6M@Jx(L`ZXEgqe12|58@`vGp%TaQfew=`avFsC zq9E(La^6>y7$uSEL$OEhat`VwwG!i8{JA}s*R90NA60ohdnkK~Es#G}3`Z0sVF}*} zwNPlgxrjdqHN`J9xdKbe1(q%^?O+;JmqWqU@i51=g9?%pRGtA2zBJr6Cei4a40HwW zYh1d7IK!ZcTMV#2tgzr?IB&*+pbi=O; zhjeI%UaSYTt`D;!qQZ7KB3}80@XYEp?r$?cN#tKeBwbu!aP#zw#FLN+0f}Rw){*zG8$n&sGZ>@K`c?V4^I5f-slc^#C{dpKH`VlPqXHN9Zy)!c z{T*|nleK5mpZv+s1qA*RCH|Ry!DhzB4q)bAAFqDq;k5Wc>m?ST;4$0{I_xoSeQylP zu&*+O85&$zg=q?*rl_bA)gBei%`t&=Lo78HER5OJuIvx5WVer_EzDL<2 za^)X3(Hc#LEp2?%{bMXNiG#oeO^3UZ8Xp^39CT%%K#Wev+#JFvb;|~K4KbhirsG|n z$=iV&4@`wiUtka|NIMG%(ki{TYyDogk&1i9FHVcMZFYjUCBW56u+7X%OB>P9;sh1D zfPXf84o+lcqMjPQTMGzPHqmGHgdtHny%i2)qJDdaD6@)`s=)G&sdj#_B+L8kfNg|1 z*S=wF?o2Z!ev%=o+y1SIIqqs6-a*~SgS8CTxo&oPuc-plHJxMv$`MWc2E7`PqG>|p|sia?- zgx}uN>D2I1)62IU6t{7;iiYOf5fTFwYj3FtC*hC2(gcOFu%O#dXPjj(gPxI)R6lY4ZY}Yz zn3p~k`@czJ3rmQ!kxwbi9Od7V@%1k(S4Mo@We*1v^dUj!O-z1YYym&PQX+>YK7){(>?i}ty?&WLX_8IF_K zweQJ5H-4GziiAxRi*c$Pv-mbcmj4)eXoA|mZQKi;YBsr30noAHmONBZ9v>H=B(Jgo zsC5t3RPa8G)QxA8b$dXcyy8pRpok%I$wgkpi(Rv6@*~o`OgHimk{#-$MyN?&@V;4f z!FN3^!gBIgE9B4(X$)88hSEcO30( z!Ly_DrJ$Vf7}_25%RTC*j+fLUelS8zO_1CY)+*%!Ciz1+kL6b=4yEnI7#w@5J|H^w zWPJ#6=qdW3u@_=TgmHlQ&bQ1_a-D;LX?G%oOkrCNMdn4h{`xtg=C+4+Vg`A5`wp5B*S7Y{mlQS*W#++cH!K#=k?ph(gf_2aOO-IQrYzGaI z?Q*)~n1S_N9jt1ooHR&1Srd^AJy(+?>Y&s?`nH2)mJWphs(I|RXN$CFFMNpd0Z7F5 zQ3~zIO~ii6?IjnCXmg`5jA-$;WH2o9u(X$8B>0Vp|JH}rde54{83FO9dqiMF`BLFS zQnJzZ=$!X09EyKjP$8hk5E^8exa?UmWv<$z^RQ_N=Ff+C?X8v{lRZ9*v$8DC_vD?w-A4zwQv>K=u(Dtz#EMJ1+8qAi_ z_@>ycs+;6T#=4^Q7i@-sw@*0bFpgG>T&bW6&E)L!Y(6LSy5z~Qlc)0%xle~9eAXRZrL9_>$+_Y^qkKI-d$R;>O& z#D=gzx_qSqU`zfi31(^;&zd`e_(PZa{*Vbj_NrYgB5)b?HVg-IL<_)F3O0LtKNBDp z2Z}vibMaU8Tt=FM+&X77df%02+Bc?ChL@HWPx>V`-RAHtL z)AYfQwqoZ)kJ^O=Qe;-EL@?&MONL;~8NMFGH)Y-6rIbg<07;4)Sz;@jJ4j*<3=fF| zm92WdUHMKMeqrC|=Vy&M(IT}V6X^PaxCNz!96zrUsVEHbAtA$9lFTTp35_JOg-Xsa z97^lN23yxQ$91~Y=dD3oRTtiWtiZ%dMzXg!a}B_sj`4IN?4{Av;EH3Lt$uf+LZh_3 zY}^GeVDgQK{Fn@#W+nLu0hVa5T?g^VzidB#2s%a~F_|g};bZdgR@fNuEgejWO^B=T zluW|CUjYJ%*7Wy%4>${J=R1Dx)zJ`$6?a?KYOC#|(CAUFu^V1DsD{tfGxks+_WQ#z zmHS+)2Sixcf_1>DnP0sWVzl$#Q=EnmgFY;(Nvjh4F{VJoOqB?!K6Tl0759}6t~Lw| z$9Qy&-D~Q0N^<_tNoZWin|esvSr;UY=UJaYd1cBBHU7JkawnAG?T>P2d=}-@m$}U@ zwv_;>5P8ApZAn2(JD@n8TWJ*F@%uz=QqYX#OF9Q_;VoO~) zR4bJ#^<$gV*n(DZI2y0HswVcEd^3;l4Tu71GKP4+_5%(MhqiUh3!LHN_Hh6Q!H#h8 z$<{IzJi#v|l9CpQj220=6H=vw>AUge?3B`P3&t+ zY*A1}m)2}Q&3U;BUlcQ8zC=)e!W2i5WHSAnXJY z5mgg$<;e-h$6*a)`o^Ubx*HVPveYBkO5cY9wSuQ ziFAS-<0xdwB(h3;Q`gk1s#T-j-cO^FUTvc^>iNwjBD)vt=45c(8WoOCK4v?+5hAYV z^@1G}(McFDZZ9E5MVhfV_YoHJ?u`;(2%R21zUp1jv6_}pqtYI{P%b;7gm;Iy;sLvNnLK0`d+Y7t-zs#P${3sKDR0Ip)*ko37S?y6|NYLz&Yjp+t#V`14zZ% zVFSF^rFnIPZ_LgQr5@<6QkUbAv{e$C8MKc;IX^V5Ihk=#4NSQtm;yw%o)=w0PMBD7o}#4`%OPI8skeSx zt&TyhXk(yhJcnN#;nhD4rLK663g{GQf=MA!r{Gh z$EixS?4UUA4g8^74UL9)M_}s-?u)^(#Lqv588!ya{KMyYAi^@)g}W52=h={wXkiX{ zYVB++HX`=f(r-39F}7KF6gHoRD3gXJKVwo39w91E>TOoeUd!#n3`gh-(e*{$3+b~n z)cYItl1`dTv&y^AnJQFsHFq73Kyw3U1fN6j{jbk-3?8TE%HIbFAbAT~1j7Sa$C%^b zhz@X66AGmBs%(RvNJ1J01&Y=Fz^UQxEI?uEgMR~>UXSZRdO=2MdxX_vg9VdxjW9EwS}53 zO^Z=R?n3R%?Nh}{fnN45((J;<{|?#*8TIw9Z_v4h_X;;l6O%@TRK_0ryK^^?NlI#p zb9HcZ-fQY?m(vqZzBSKZ6Wq_1OQ|6#j6O41+$nk|c+_5q?epNA&_)~ZVggnManFk% zw~$^B1zDuOc9Dh5aWslZTdlzL{6Kq5T1jczfhCJ#Yl@d4#kc?hP#?I?lDrq>p*KM; zrLRDSi+v+^KpVf>+*~9=;_lVZfqZBRU0nvH5Qp4Sl!;XwxfhOf^W%7j`O%Y5o*!{KAr+1>L|O>HO%|1ZafN){k;p`B~(X_G2X z+$_K<`vji8gH|u#&iV{lvCx<2k^{_a@Q_G10om0QF>t6YKU)uffdF(l}>Ue4@ z+csHp^8T490}l~T$vSXig964s8fhc>yjcR3Lth7C7+CufDiuOgBY!O?}94F`C4|C4gHpqGIC87Q=~99O9P`YLc|+nc>|lsFEiT>@=$RY zzbXrcTxOkGou7(i)eAMK?*g6sP}cYqIG&%YGH(OARuA*`i>oCd+j~M#R*nezZb!Pe z59*bLvie^Jls*EzAN(e1=GQP1RW`oCOZb{7>F2oj2~E6!9vAJLSfE7#y}((NGN_py zi}*xyf;NwoIZfC^-vp3Htt;{oFL_I{B;O>hOtr+2oG?0nq%`ZtV%aa4zQ)YMU8-j6 zcnOwQqeL&|)Xroz8l+}o!sW2@((}fO8SpQrsQ|%^M-aA7fz2G*2oenM5eUyB1Jv!@ zQ(@||!zNSw@7K5Lg0Z3`wJ_CNx4sbU-Ktm2e;rDm!Eup#7`b43xzsN;8A{S`rE%cc z4=;MrJyJzJ%DlY}>}jlcd?^E^KEa>|zh&*3{NMz))(kE6HF9LGLl45;wi;Xy79BVj zB$kviFumH~@1fJPS?S){t~7m2niP0?b^qG!+_FUYM*sA3F`oKX7yw!*BO60SdmCFY zv!RW>@z0D+nnU&9Cg;6s}7I zMH5iGS~HCt`BkzGXc@Yuop5mcp_E(6RXB@*`UHJ?hl1ctvEb0n0&q(3m7}(0^VPQY zXI5cc4UzR&gvhARUm`^v#HC-6e@_A-_+aB}P3Zgev*ljxqpsII*6J%#%a?a`_9c(m z6?BX(xPY{WUHU<#;Pa?jrAMGCMpFM=w4YMfw_&7H=pvcBgAmBi znbf~hFl?YEBJ_!Zz$eoH1W^4!K?7Uce=v~jzYP5U=$EEkE#J)oT#2~~9^i4*0tzP> z2801&FcY-qKY+~{F_&thij4D~d3O(Vt({9Wv00>c`Se^Itq!;D42KSjsZqZPFiRWN z=t!!8EpuwFu8rW6wUKAiz*Jh$Nf;CEO9U?D^YUYEt@FaN0kLYTUKxT#1-5zjxm4*c zLK=FonN*RBQCN?{y3CrKZt|hSJ)#SCh;b3wgYSBdQ8W)Y%T7BAny2O3uQ+wxkQv}c z(|o*;;FDhCv}uQhzNLzgWO}A14)j<9HfE)$MI<08g%Jk7F>@db{g#mUN`EjeYyxFa zqufsk*Ia`F6pqT|*+V*48U@)0-CtCD-CBO$*R{oL*oJrN*n1DAMj(Rn6)`ZJfG6K% z+5e)CblQ$z>f=qdW@H9#w5!0Gs%3G)I;w%Ef!ST{GECwR`C)i9pr*G#WxSltW0<++ zpUF)KxAcjGz1OG$di%Vz;jW#`+Wc#_rK~vE*fiSaO$U6B zPdjY$f82(GW_+rXf4{5f_t*G0dyEui{tEEdoezHoNPiN{pY}og9{Bq-_>a)8r=HSp zv*F)^|2hTxBNPBYc=GlCekS%i&hImkKahl;8q@!7Lh?Jx@6GH#P{z>y7UfrK`*)Pz z+i8EG#A5zJ`MtUJJHYQtn?C>~2z~+lvC#QF^mh~WM=1XDe}w*SseVWJYo-1p9sq#6 s0091_ZvP(s*DU+za2K*ah5s|>D$2k;apdP#n`eN&Cu^We`SaWV0K_dt5C8xG literal 0 HcmV?d00001 diff --git a/tests/resources/CoreIssue1248/data_and_define/define.xml b/tests/resources/CoreIssue1248/data_and_define/define.xml new file mode 100644 index 000000000..50bc56b7b --- /dev/null +++ b/tests/resources/CoreIssue1248/data_and_define/define.xml @@ -0,0 +1,146 @@ + + + + + CDISCPILOT01 + Study Data Tabulation Model Metadata Submission Guidelines Sample Study + CDISCPILOT01 + + + + + + + + + + + + + + Related Records + + + + + + + + + + + relrec.xpt + + + + + Study Identifier + + + + + + Related Domain Abbreviation + + + + + + + Unique Subject Identifier + + + + + + Identifying Variable + + + + + + Identifying Variable Value + + + + + + Relationship Type + + + + + + + Relationship Identifier + + + + + + + Adverse Events + + + + + + Disposition + + + + + + Death Details + + + + + + Findings About Events or Interventions + + + + + + + + + Many + + + + + + One + + + + + + + Annotated CRF + + + Reviewers Guide + + + + \ No newline at end of file From 7343679fe43f90815154516c2acfae3665547693 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Tue, 17 Mar 2026 18:32:19 +0100 Subject: [PATCH 6/6] #1248 moved define contents read to base operation --- cdisc_rules_engine/operations/base_operation.py | 16 ++++++++++++++++ .../define_dictionary_version_validator.py | 13 +------------ .../operations/define_variable_metadata.py | 14 +------------- 3 files changed, 18 insertions(+), 25 deletions(-) diff --git a/cdisc_rules_engine/operations/base_operation.py b/cdisc_rules_engine/operations/base_operation.py index a045a323b..80c088a36 100644 --- a/cdisc_rules_engine/operations/base_operation.py +++ b/cdisc_rules_engine/operations/base_operation.py @@ -1,3 +1,6 @@ +import os + +from cdisc_rules_engine.constants.define_xml_constants import DEFINE_XML_FILE_NAME from cdisc_rules_engine.models.operation_params import OperationParams from cdisc_rules_engine.constants.permissibility import ( PERMISSIBLE, @@ -314,3 +317,16 @@ def _resolve_variable_name(variable_name, domain: str): if "--" in variable_name else variable_name ) + + def _get_define_contents(self): + define_path = ( + self.params.define_xml_path + if self.params.define_xml_path + else os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) + ) + if not os.path.exists(define_path): + raise FileNotFoundError(f"Define XML file {define_path} not found") + define_contents = self.data_service.get_define_xml_contents( + dataset_name=define_path + ) + return define_contents diff --git a/cdisc_rules_engine/operations/define_dictionary_version_validator.py b/cdisc_rules_engine/operations/define_dictionary_version_validator.py index ba0506e08..2b4190896 100644 --- a/cdisc_rules_engine/operations/define_dictionary_version_validator.py +++ b/cdisc_rules_engine/operations/define_dictionary_version_validator.py @@ -1,4 +1,3 @@ -from cdisc_rules_engine.constants.define_xml_constants import DEFINE_XML_FILE_NAME from cdisc_rules_engine.models.external_dictionaries_container import ( DICTIONARY_VALIDATORS, DictionaryTypes, @@ -8,7 +7,6 @@ ) from .base_operation import BaseOperation from cdisc_rules_engine.exceptions.custom_exceptions import UnsupportedDictionaryType -import os class DefineDictionaryVersionValidator(BaseOperation): @@ -36,16 +34,7 @@ def _execute_operation(self) -> bool: whodrug_path=self.params.whodrug_path, loinc_path=self.params.loinc_path, ) - define_path = ( - self.params.define_xml_path - if self.params.define_xml_path - else os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) - ) - if not os.path.exists(define_path): - raise FileNotFoundError(f"Define XML file {define_path} not found") - define_contents = self.data_service.get_define_xml_contents( - dataset_name=define_path - ) + define_contents = self._get_define_contents() define_reader = DefineXMLReaderFactory.from_file_contents(define_contents) define_dictionary_version = define_reader.get_external_dictionary_version( self.params.external_dictionary_type diff --git a/cdisc_rules_engine/operations/define_variable_metadata.py b/cdisc_rules_engine/operations/define_variable_metadata.py index a196f3cb3..c6312cb82 100644 --- a/cdisc_rules_engine/operations/define_variable_metadata.py +++ b/cdisc_rules_engine/operations/define_variable_metadata.py @@ -1,10 +1,7 @@ -import os.path - from cdisc_rules_engine.services.define_xml.define_xml_reader_factory import ( DefineXMLReaderFactory, ) from .base_operation import BaseOperation -from cdisc_rules_engine.constants.define_xml_constants import DEFINE_XML_FILE_NAME class DefineVariableMetadata(BaseOperation): @@ -33,16 +30,7 @@ def _execute_operation(self): ... } """ - define_path = ( - self.params.define_xml_path - if self.params.define_xml_path - else os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) - ) - if not os.path.exists(define_path): - raise FileNotFoundError(f"Define XML file {define_path} not found") - define_contents = self.data_service.get_define_xml_contents( - dataset_name=define_path - ) + define_contents = self._get_define_contents() define_reader = DefineXMLReaderFactory.from_file_contents(define_contents) variables_metadata = define_reader.extract_variables_metadata( self.params.domain