From 656d11fc77b2cb63d0db7667fb0cbde435c10fe6 Mon Sep 17 00:00:00 2001 From: Taisiia Sherstiukova Date: Wed, 2 Apr 2025 15:54:45 -0600 Subject: [PATCH] changed the sequence and proteinSequence functions to make all sequences upper case no matter the input format --- .DS_Store | Bin 6148 -> 6148 bytes excelutils/.DS_Store | Bin 6148 -> 6148 bytes excelutils/excel_sbol_utils/.DS_Store | Bin 6148 -> 6148 bytes excelutils/excel_sbol_utils/library2.py | 8 ++++---- excelutils/excel_sbol_utils/test.py | 4 ++++ 5 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 excelutils/excel_sbol_utils/test.py diff --git a/.DS_Store b/.DS_Store index f44001aebe3168b95c406d69715458c38c5d1a94..dd6e21a0facfee5ffd04bf2205a4a36c4b03811a 100644 GIT binary patch delta 127 zcmZoMXffEJ#uU5iHvSlev8?Rfy+@BZqROY>l`qIJ r3{K9^Edc6aU{F0UxtZw(Q`N7@vCMUh37a1>tFcThVBO5l@s}R}7NI4c delta 127 zcmZoMXffEJ#uO_f$H2hA!l1{H&XCDalAG`1l9ZF51Qg@Q=QjKBb^9?#RQVLV@&y@& q!O8i#1wcIv3@QSXo0)DfmHeC>%Us78u=ydg8q352*3Il3fB6AB&Lax| diff --git a/excelutils/.DS_Store b/excelutils/.DS_Store index 23ddafc31379b030ea9b62af7816cb661003caac..e71403e653cb92f198f597b88f96d7b593628b3c 100644 GIT binary patch delta 49 zcmZoMXffE3&B7F$G&z^WfP>MmgKPWIV~#+M+T_zLTx<}5$+uZd7!Pa~VErNl0G^@| A7XSbN delta 49 zcmZoMXffE3&B7F(I60TafaCtrt*6&-JLU-Fs7*f2!o>y=n0%YXgmJ@W0oE@<00yNM ASO5S3 diff --git a/excelutils/excel_sbol_utils/.DS_Store b/excelutils/excel_sbol_utils/.DS_Store index 0c1ddf2650ed3e2cf848f4a4217d11380326f19e..274bcf39e195eaa7bb06afb19ed620aa2a46e880 100644 GIT binary patch delta 45 zcmZoMXffDe!pP*#Fqwx@YH|Q04_o_7o20!*Cr@CMnOq0tUd^uf^Wog)35+FT085Jy AdjJ3c delta 45 ucmZoMXffDe!pP)&cQOy7)Z_q09=5GjE^gNjPM*LhGr11V-8_M@L<|60oDcE< diff --git a/excelutils/excel_sbol_utils/library2.py b/excelutils/excel_sbol_utils/library2.py index c064d63..5754ac8 100644 --- a/excelutils/excel_sbol_utils/library2.py +++ b/excelutils/excel_sbol_utils/library2.py @@ -982,9 +982,9 @@ def sequence(rowobj): elif re.match(r'^[a-zA-Z \s*]+$', val): # if a sequence string - # removes spaces, enters, and makes all lower case + # removes spaces, enters, and makes all upper case val = "".join(val.split()) - val = val.replace(u"\ufeff", "").lower() + val = val.replace(u"\ufeff", "").upper() uri = f'{url}/search/sequence={val}&' if data["Domain"].strip() == "": @@ -1042,10 +1042,10 @@ def proteinSequence(rowobj): return rowobj.obj.sequences = [val] - elif re.match(r'^[ACDEFGHIKLMNPQRSTVWY\s*]+$', val): + elif re.match(r'^[ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy\s*]+$', val): # if a sequence string - # removes spaces, enters, and makes all lower case + # removes spaces, enters, and makes all upper case val = "".join(val.split()) # removes * val = val.replace('*', '') diff --git a/excelutils/excel_sbol_utils/test.py b/excelutils/excel_sbol_utils/test.py new file mode 100644 index 0000000..fdf168e --- /dev/null +++ b/excelutils/excel_sbol_utils/test.py @@ -0,0 +1,4 @@ +user_input = " Hello " +cleaned_input = user_input.strip() +print(f"Original input: '{user_input}'") +print(f"Cleaned input: '{cleaned_input}'") \ No newline at end of file