From 3ccb2a51c1dc164f6746871f096d15e1e58fd526 Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Mon, 28 Mar 2022 11:19:37 +0200 Subject: [PATCH 01/16] Added support for different csv encodings in Populate_Metadata.py. Requires omero-py with support for different file encodings in omero.utils.populate_roi.DownloadingOriginalFileProvider --- omero/import_scripts/Populate_Metadata.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/omero/import_scripts/Populate_Metadata.py b/omero/import_scripts/Populate_Metadata.py index 168e80632..d127bbe9f 100644 --- a/omero/import_scripts/Populate_Metadata.py +++ b/omero/import_scripts/Populate_Metadata.py @@ -107,7 +107,7 @@ def populate_metadata(client, conn, script_params): object_ids = script_params["IDs"] object_id = object_ids[0] data_type = script_params["Data_Type"] - + encoding = script_params["CSV Encoding"] if data_type == "Image": try: from omero_metadata.populate import ImageWrapper # noqa: F401 @@ -120,7 +120,7 @@ def populate_metadata(client, conn, script_params): original_file = get_original_file( conn, data_type, object_id, file_ann_id) provider = DownloadingOriginalFileProvider(conn) - data_for_preprocessing = provider.get_original_file_data(original_file) + data_for_preprocessing = provider.get_original_file_data(original_file, encoding=encoding) temp_name = data_for_preprocessing.name # 5.9.1 returns NamedTempFile where name is a string. if isinstance(temp_name, int): @@ -172,6 +172,13 @@ def run_script(): "File_Annotation", grouping="3", description="File Annotation ID containing metadata to populate. " "Note this is not the same as the File ID."), + + scripts.String( + "CSV Encoding", grouping="4", + description="""Encoding of the CSV File provided. Can depend on your system locale + as well as the program used to generate the CSV File. E.g. Excel defaults to machine specific + ANSI encoding during export to CSV (i.e. cp1252 on US machines, iso-8859-1 on german machines ...).""", + default="utf-8"), authors=["Emil Rozbicki", "OME Team"], institutions=["Glencoe Software Inc."], From 10f73a073215785ba9b1e75685523a9365efee8c Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Mon, 28 Mar 2022 11:28:40 +0200 Subject: [PATCH 02/16] Added Test for latin-1 and cp1252 encoding in Populate_Metadata.py --- test/integration/test_import_scripts.py | 104 ++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/test/integration/test_import_scripts.py b/test/integration/test_import_scripts.py index 3cc8b8d2b..fedf046f5 100644 --- a/test/integration/test_import_scripts.py +++ b/test/integration/test_import_scripts.py @@ -124,3 +124,107 @@ def test_populate_metadata_for_screen(self): assert message is not None assert message.getValue().startswith('Table data populated') conn.close() + + def test_populate_metadata_for_cp1252(self): + sid = super(TestImportScripts, self).get_script(populate_metadata) + assert sid > 0 + + client, user = self.new_client_and_user() + update_service = client.getSession().getUpdateService() + plates = self.import_plates(client, plate_cols=3, plate_rows=1) + plate = plates[0] + name = plate.name.val + screen = omero.model.ScreenI() + screen.name = omero.rtypes.rstring("test_for_cp1252") + spl = omero.model.ScreenPlateLinkI() + spl.setParent(screen) + spl.setChild(plate) + spl = update_service.saveAndReturnObject(spl) + screen_id = spl.getParent().id.val + assert screen_id > 0 + assert spl.getChild().id.val == plate.id.val + + cvs_file = create_path("test_cp1252", ".csv") + + # create a file annotation + with open(cvs_file.abspath(), 'wb+') as f: + f.write("Well,Plate, Well Type, Facility-Salt-Batch-ID\n".encode("cp1252")) + f.write(("A01,%s,Treatment,FOOL10041-101-2\n" % name).encode("cp1252")) + f.write(("A02,%s,Control,\n" % name).encode("cp1252")) + f.write(("A03,%s,Treatment,FOOL10041-101-2\n" % name).encode("cp1252")) + + conn = BlitzGateway(client_obj=client) + fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv") + assert fa is not None + assert fa.id > 0 + link = omero.model.ScreenAnnotationLinkI() + link.setParent(omero.model.ScreenI(screen_id, False)) + link.setChild(omero.model.FileAnnotationI(fa.id, False)) + link = update_service.saveAndReturnObject(link) + assert link.id.val > 0 + # run the script + screen_ids = [] + screen_ids.append(spl.getParent().id) + + args = { + "Data_Type": omero.rtypes.rstring("Screen"), + "IDs": omero.rtypes.rlist(screen_ids), + "File_Annotation": omero.rtypes.rstring(str(fa.id)), + "CSV Encoding": omero.rtypes.rstring(str("cp1252")) + } + message = run_script(client, sid, args, "Message") + assert message is not None + assert message.getValue().startswith('Table data populated') + conn.close() + + def test_populate_metadata_for_iso8859(self): + sid = super(TestImportScripts, self).get_script(populate_metadata) + assert sid > 0 + + client, user = self.new_client_and_user() + update_service = client.getSession().getUpdateService() + plates = self.import_plates(client, plate_cols=3, plate_rows=1) + plate = plates[0] + name = plate.name.val + screen = omero.model.ScreenI() + screen.name = omero.rtypes.rstring("test_for_iso8859") + spl = omero.model.ScreenPlateLinkI() + spl.setParent(screen) + spl.setChild(plate) + spl = update_service.saveAndReturnObject(spl) + screen_id = spl.getParent().id.val + assert screen_id > 0 + assert spl.getChild().id.val == plate.id.val + + cvs_file = create_path("test_iso8859", ".csv") + + # create a file annotation + with open(cvs_file.abspath(), 'wb+') as f: + f.write("Well,Plate, Well Type, Facility-Salt-Batch-ID\n".encode("iso-8859-1")) + f.write(("A01,%s,Treatment,FOOL10041-101-2\n" % name).encode("iso-8859-1")) + f.write(("A02,%s,Control,\n" % name).encode("iso-8859-1")) + f.write(("A03,%s,Treatment,FOOL10041-101-2\n" % name).encode("iso-8859-1")) + + conn = BlitzGateway(client_obj=client) + fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv") + assert fa is not None + assert fa.id > 0 + link = omero.model.ScreenAnnotationLinkI() + link.setParent(omero.model.ScreenI(screen_id, False)) + link.setChild(omero.model.FileAnnotationI(fa.id, False)) + link = update_service.saveAndReturnObject(link) + assert link.id.val > 0 + # run the script + screen_ids = [] + screen_ids.append(spl.getParent().id) + + args = { + "Data_Type": omero.rtypes.rstring("Screen"), + "IDs": omero.rtypes.rlist(screen_ids), + "File_Annotation": omero.rtypes.rstring(str(fa.id)), + "CSV Encoding": omero.rtypes.rstring(str("iso-8859-1")) + } + message = run_script(client, sid, args, "Message") + assert message is not None + assert message.getValue().startswith('Table data populated') + conn.close() From e21a065c0b52c24ad945ee1eb533fa3250a65e3b Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Thu, 2 Jun 2022 14:07:48 +0200 Subject: [PATCH 03/16] Added try-except block around the get_original_file_data() code to provide clear error message and exit the function early --- omero/import_scripts/Populate_Metadata.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/omero/import_scripts/Populate_Metadata.py b/omero/import_scripts/Populate_Metadata.py index d127bbe9f..c7f0727ea 100644 --- a/omero/import_scripts/Populate_Metadata.py +++ b/omero/import_scripts/Populate_Metadata.py @@ -31,6 +31,7 @@ import sys from omero.util.populate_roi import DownloadingOriginalFileProvider +from omero.util.populate_roi import DecodingError try: # Hopefully this will import @@ -120,7 +121,12 @@ def populate_metadata(client, conn, script_params): original_file = get_original_file( conn, data_type, object_id, file_ann_id) provider = DownloadingOriginalFileProvider(conn) - data_for_preprocessing = provider.get_original_file_data(original_file, encoding=encoding) + try: + data_for_preprocessing = provider.get_original_file_data(original_file, encoding=encoding) + except DecodingError as e: + e.add_note("The CSV file provided could not be decoded using the specified encoding. Please check the encoding and contents of the file!") + raise + temp_name = data_for_preprocessing.name # 5.9.1 returns NamedTempFile where name is a string. if isinstance(temp_name, int): From f75a8a482deac0f2f1a6f39b25f0d234320ef312 Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Thu, 2 Jun 2022 14:09:19 +0200 Subject: [PATCH 04/16] Rewrote encoding test case to check all available encodings on the machine. All test cases should either import without error or raise the correct error message --- test/integration/test_import_scripts.py | 157 +++++++++--------------- 1 file changed, 60 insertions(+), 97 deletions(-) diff --git a/test/integration/test_import_scripts.py b/test/integration/test_import_scripts.py index fedf046f5..148a2d671 100644 --- a/test/integration/test_import_scripts.py +++ b/test/integration/test_import_scripts.py @@ -125,106 +125,69 @@ def test_populate_metadata_for_screen(self): assert message.getValue().startswith('Table data populated') conn.close() - def test_populate_metadata_for_cp1252(self): + def test_populate_metadata_for_encodings(self): sid = super(TestImportScripts, self).get_script(populate_metadata) assert sid > 0 - + import encodings + import os + import os + def encodinglist(): + r=[] + for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]): + name=os.path.splitext(i)[0] + try: + "".encode(name) + except: + pass + else: + r.append(name.replace("_","-")) + return r + + + encodings = encodinglist() client, user = self.new_client_and_user() - update_service = client.getSession().getUpdateService() - plates = self.import_plates(client, plate_cols=3, plate_rows=1) - plate = plates[0] - name = plate.name.val - screen = omero.model.ScreenI() - screen.name = omero.rtypes.rstring("test_for_cp1252") - spl = omero.model.ScreenPlateLinkI() - spl.setParent(screen) - spl.setChild(plate) - spl = update_service.saveAndReturnObject(spl) - screen_id = spl.getParent().id.val - assert screen_id > 0 - assert spl.getChild().id.val == plate.id.val - - cvs_file = create_path("test_cp1252", ".csv") - - # create a file annotation - with open(cvs_file.abspath(), 'wb+') as f: - f.write("Well,Plate, Well Type, Facility-Salt-Batch-ID\n".encode("cp1252")) - f.write(("A01,%s,Treatment,FOOL10041-101-2\n" % name).encode("cp1252")) - f.write(("A02,%s,Control,\n" % name).encode("cp1252")) - f.write(("A03,%s,Treatment,FOOL10041-101-2\n" % name).encode("cp1252")) - conn = BlitzGateway(client_obj=client) - fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv") - assert fa is not None - assert fa.id > 0 - link = omero.model.ScreenAnnotationLinkI() - link.setParent(omero.model.ScreenI(screen_id, False)) - link.setChild(omero.model.FileAnnotationI(fa.id, False)) - link = update_service.saveAndReturnObject(link) - assert link.id.val > 0 - # run the script - screen_ids = [] - screen_ids.append(spl.getParent().id) - - args = { - "Data_Type": omero.rtypes.rstring("Screen"), - "IDs": omero.rtypes.rlist(screen_ids), - "File_Annotation": omero.rtypes.rstring(str(fa.id)), - "CSV Encoding": omero.rtypes.rstring(str("cp1252")) - } - message = run_script(client, sid, args, "Message") - assert message is not None - assert message.getValue().startswith('Table data populated') - conn.close() - - def test_populate_metadata_for_iso8859(self): - sid = super(TestImportScripts, self).get_script(populate_metadata) - assert sid > 0 - - client, user = self.new_client_and_user() update_service = client.getSession().getUpdateService() - plates = self.import_plates(client, plate_cols=3, plate_rows=1) - plate = plates[0] - name = plate.name.val - screen = omero.model.ScreenI() - screen.name = omero.rtypes.rstring("test_for_iso8859") - spl = omero.model.ScreenPlateLinkI() - spl.setParent(screen) - spl.setChild(plate) - spl = update_service.saveAndReturnObject(spl) - screen_id = spl.getParent().id.val - assert screen_id > 0 - assert spl.getChild().id.val == plate.id.val - - cvs_file = create_path("test_iso8859", ".csv") - - # create a file annotation - with open(cvs_file.abspath(), 'wb+') as f: - f.write("Well,Plate, Well Type, Facility-Salt-Batch-ID\n".encode("iso-8859-1")) - f.write(("A01,%s,Treatment,FOOL10041-101-2\n" % name).encode("iso-8859-1")) - f.write(("A02,%s,Control,\n" % name).encode("iso-8859-1")) - f.write(("A03,%s,Treatment,FOOL10041-101-2\n" % name).encode("iso-8859-1")) - - conn = BlitzGateway(client_obj=client) - fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv") - assert fa is not None - assert fa.id > 0 - link = omero.model.ScreenAnnotationLinkI() - link.setParent(omero.model.ScreenI(screen_id, False)) - link.setChild(omero.model.FileAnnotationI(fa.id, False)) - link = update_service.saveAndReturnObject(link) - assert link.id.val > 0 - # run the script - screen_ids = [] - screen_ids.append(spl.getParent().id) - - args = { - "Data_Type": omero.rtypes.rstring("Screen"), - "IDs": omero.rtypes.rlist(screen_ids), - "File_Annotation": omero.rtypes.rstring(str(fa.id)), - "CSV Encoding": omero.rtypes.rstring(str("iso-8859-1")) - } - message = run_script(client, sid, args, "Message") - assert message is not None - assert message.getValue().startswith('Table data populated') + + for enc in encodings: + plates = self.import_plates(client, plate_cols=3, plate_rows=1) + plate = plates[0] + name = plate.name.val + screen = omero.model.ScreenI() + screen.name = omero.rtypes.rstring("test_for_%s" % (enc)") + spl = omero.model.ScreenPlateLinkI() + spl.setParent(screen) + spl.setChild(plate) + spl = update_service.saveAndReturnObject(spl) + screen_id = spl.getParent().id.val + assert screen_id > 0 + assert spl.getChild().id.val == plate.id.val + cvs_file = create_path("test_cp1252", ".csv") + # create a file annotation + with open(cvs_file.abspath(), 'wb+') as f: + f.write("Well,Plate, Well Type, Facility-Salt-Batch-ID,Comment,\n".encode(enc)) + f.write(("A01,%s,Treatment,FOOL10041-101-2,TestString containing greek µ\n" % name).encode(enc)) + f.write(("A02,%s,Control,TestString containing symbol ±\n" % name).encode(enc)) + f.write(("A03,%s,Treatment,FOOL10041-101-2,TestString containing special character §\n" % name).encode(enc)) + fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv") + assert fa is not None + assert fa.id > 0 + link = omero.model.ScreenAnnotationLinkI() + link.setParent(omero.model.ScreenI(screen_id, False)) + link.setChild(omero.model.FileAnnotationI(fa.id, False)) + link = update_service.saveAndReturnObject(link) + assert link.id.val > 0 + # run the script + screen_ids = [] + screen_ids.append(spl.getParent().id) + + args = { + "Data_Type": omero.rtypes.rstring("Screen"), + "IDs": omero.rtypes.rlist(screen_ids), + "File_Annotation": omero.rtypes.rstring(str(fa.id)), + "CSV Encoding": omero.rtypes.rstring(str(enc)) + } + message = run_script(client, sid, args, "Message") + assert message is not None + assert message.getValue().startswith('Table data populated') or message.getValue.startswith('The CSV file provided could not be decoded') conn.close() From 9921b3a0a9aed5daa5b75be3f49b9399e10f586f Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Thu, 2 Jun 2022 15:16:18 +0200 Subject: [PATCH 05/16] Added checks for correct omero-py version, that support encodings differing from utf-8. Refactored the creation of the scripts.client to allow for dynamic display of the encoding field only if support is available. Also: Switched free string input for encodings over to a list input based on the encodings available on the server. --- omero/import_scripts/Populate_Metadata.py | 65 +++++++++++++++++------ 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/omero/import_scripts/Populate_Metadata.py b/omero/import_scripts/Populate_Metadata.py index c7f0727ea..453d687a0 100644 --- a/omero/import_scripts/Populate_Metadata.py +++ b/omero/import_scripts/Populate_Metadata.py @@ -31,7 +31,6 @@ import sys from omero.util.populate_roi import DownloadingOriginalFileProvider -from omero.util.populate_roi import DecodingError try: # Hopefully this will import @@ -59,6 +58,36 @@ for additional features: https://pypi.org/project/omero-metadata/ """ +# Check if the populate_roi scripts was updated to include functionality for encodings other than utf-8 +# If yes, define a function to query all available encodings and set a flag +# If no, add information for the user + +try: + from omero.util.populate_roi import DecodingError +except ImportError: + encoding = 'utf-8' + EncSup = False + DEPRECATED += """ + + Warning: This script is using an omero-py version without support for different CSV encodings. + All CSV files will be assumed to be utf-8 encoded. If you need support for different encodings, + ask your administrator to update the installation. + """ +else: + EncSup = True + import os + def encodinglist(): + r=[] + for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]): + name=os.path.splitext(i)[0] + try: + "".encode(name) + except: + pass + else: + r.append(name.replace("_","-")) + return r + def link_file_ann(conn, object_type, object_id, file_ann_id): """Link File Annotation to the Object, if not already linked.""" @@ -156,16 +185,8 @@ def populate_metadata(client, conn, script_params): def run_script(): data_types = [rstring(otype) for otype in OBJECT_TYPES] - client = scripts.client( - 'Populate_Metadata.py', - """ - This script processes a CSV file, using it to - 'populate' an OMERO.table, with one row per Image, Well or ROI. - The table data can then be displayed in the OMERO clients. - For full details of the supported CSV format, see - https://github.com/ome/omero-metadata/#populate - """ + DEPRECATED, - scripts.String( + + fields = [scripts.String( "Data_Type", optional=False, grouping="1", description="Choose source of images", values=data_types, default=OBJECT_TYPES[0]), @@ -177,15 +198,27 @@ def run_script(): scripts.String( "File_Annotation", grouping="3", description="File Annotation ID containing metadata to populate. " - "Note this is not the same as the File ID."), - - scripts.String( + "Note this is not the same as the File ID.")] + + # Only display the Encoding field if omero.util.populate_roi has support for different encodings + if EncSup: + fields.append( scripts.String( "CSV Encoding", grouping="4", description="""Encoding of the CSV File provided. Can depend on your system locale as well as the program used to generate the CSV File. E.g. Excel defaults to machine specific ANSI encoding during export to CSV (i.e. cp1252 on US machines, iso-8859-1 on german machines ...).""", - default="utf-8"), - + values=encodinglist(),default="utf-8")) + + client = scripts.client( + 'Populate_Metadata.py', + """ + This script processes a CSV file, using it to + 'populate' an OMERO.table, with one row per Image, Well or ROI. + The table data can then be displayed in the OMERO clients. + For full details of the supported CSV format, see + https://github.com/ome/omero-metadata/#populate + """ + DEPRECATED, + *fields, authors=["Emil Rozbicki", "OME Team"], institutions=["Glencoe Software Inc."], contact="ome-users@lists.openmicroscopy.org.uk", From 779cfd51e23f16995638683aa0c9b5bc8f859a86 Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Fri, 3 Jun 2022 15:32:41 +0200 Subject: [PATCH 06/16] Change detection of omero-py version over to a method that directly checks if get_original_file_data has the encoding argument. Check via import of DecodingError is not possible anymore since the custom class was removed and also not a direct proof that get_original_file_data supports the encoding argument. Also changed the respective error catch and refactored the encoding detection code. Change Error Catch to UnicodeDecodeError Fixed variable --- omero/import_scripts/Populate_Metadata.py | 36 ++++++++++------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/omero/import_scripts/Populate_Metadata.py b/omero/import_scripts/Populate_Metadata.py index 453d687a0..bfce3b5dd 100644 --- a/omero/import_scripts/Populate_Metadata.py +++ b/omero/import_scripts/Populate_Metadata.py @@ -62,32 +62,26 @@ # If yes, define a function to query all available encodings and set a flag # If no, add information for the user -try: - from omero.util.populate_roi import DecodingError -except ImportError: +if "encoding" in DownloadingOriginalFileProvider.get_original_file_data.__code__.co_varnames: + import os + EncSup = True + AvailEncodings= [] + for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]): + name=os.path.splitext(i)[0] + try: + "".encode(name) + except: + pass + else: + AvailEncodings.append(name.replace("_","-")) +else: encoding = 'utf-8' EncSup = False DEPRECATED += """ - Warning: This script is using an omero-py version without support for different CSV encodings. All CSV files will be assumed to be utf-8 encoded. If you need support for different encodings, ask your administrator to update the installation. """ -else: - EncSup = True - import os - def encodinglist(): - r=[] - for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]): - name=os.path.splitext(i)[0] - try: - "".encode(name) - except: - pass - else: - r.append(name.replace("_","-")) - return r - def link_file_ann(conn, object_type, object_id, file_ann_id): """Link File Annotation to the Object, if not already linked.""" @@ -152,7 +146,7 @@ def populate_metadata(client, conn, script_params): provider = DownloadingOriginalFileProvider(conn) try: data_for_preprocessing = provider.get_original_file_data(original_file, encoding=encoding) - except DecodingError as e: + except UnicodeDecodeError as e: e.add_note("The CSV file provided could not be decoded using the specified encoding. Please check the encoding and contents of the file!") raise @@ -207,7 +201,7 @@ def run_script(): description="""Encoding of the CSV File provided. Can depend on your system locale as well as the program used to generate the CSV File. E.g. Excel defaults to machine specific ANSI encoding during export to CSV (i.e. cp1252 on US machines, iso-8859-1 on german machines ...).""", - values=encodinglist(),default="utf-8")) + values=AvailEncodings,default="utf-8")) client = scripts.client( 'Populate_Metadata.py', From 3627a2a80107dd63b9192becdfb9d902358c1f2b Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Fri, 3 Jun 2022 15:43:37 +0200 Subject: [PATCH 07/16] Added skip condition to encoding test, if omero-py does not support it --- test/integration/test_import_scripts.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/test/integration/test_import_scripts.py b/test/integration/test_import_scripts.py index 148a2d671..55d8f88f3 100644 --- a/test/integration/test_import_scripts.py +++ b/test/integration/test_import_scripts.py @@ -130,31 +130,33 @@ def test_populate_metadata_for_encodings(self): assert sid > 0 import encodings import os - import os - def encodinglist(): - r=[] - for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]): + from omero.util.populate_roi import DownloadingOriginalFileProvider + + # Skip test if the omero-py version does not yet provide support for the encodings + if "encoding" in DownloadingOriginalFileProvider.get_original_file_data.__code__.co_varnames: + print("Skipping test of populate_metadata.py for encodings as omero-py version does not support it!") + return + + AvailEncodings=[] + for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]): name=os.path.splitext(i)[0] try: "".encode(name) except: pass else: - r.append(name.replace("_","-")) - return r - - - encodings = encodinglist() + AvailEncodings.append(name.replace("_","-")) + client, user = self.new_client_and_user() conn = BlitzGateway(client_obj=client) update_service = client.getSession().getUpdateService() - for enc in encodings: + for enc in AvailEncodings: plates = self.import_plates(client, plate_cols=3, plate_rows=1) plate = plates[0] name = plate.name.val screen = omero.model.ScreenI() - screen.name = omero.rtypes.rstring("test_for_%s" % (enc)") + screen.name = omero.rtypes.rstring("test_for_%s" % (enc)) spl = omero.model.ScreenPlateLinkI() spl.setParent(screen) spl.setChild(plate) From 24901229a81badee3603c97378fdc741da1b47fc Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Fri, 3 Jun 2022 16:44:47 +0200 Subject: [PATCH 08/16] Keep encoding variable untouched in populate_metadata if EncSup = False --- omero/import_scripts/Populate_Metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/omero/import_scripts/Populate_Metadata.py b/omero/import_scripts/Populate_Metadata.py index bfce3b5dd..b5569f652 100644 --- a/omero/import_scripts/Populate_Metadata.py +++ b/omero/import_scripts/Populate_Metadata.py @@ -131,7 +131,7 @@ def populate_metadata(client, conn, script_params): object_ids = script_params["IDs"] object_id = object_ids[0] data_type = script_params["Data_Type"] - encoding = script_params["CSV Encoding"] + if encSup: encoding = script_params["CSV Encoding"] if data_type == "Image": try: from omero_metadata.populate import ImageWrapper # noqa: F401 From dc54b1323c3e0a2179bee0f71b6ea33b8cbc9d52 Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Tue, 7 Jun 2022 06:52:21 +0200 Subject: [PATCH 09/16] Fixed pytest complaints except two of the "line too long" PEP8ify statements, that would make the code harder to read. --- omero/import_scripts/Populate_Metadata.py | 45 ++++++++++++++--------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/omero/import_scripts/Populate_Metadata.py b/omero/import_scripts/Populate_Metadata.py index b5569f652..e80d13a3a 100644 --- a/omero/import_scripts/Populate_Metadata.py +++ b/omero/import_scripts/Populate_Metadata.py @@ -58,31 +58,34 @@ for additional features: https://pypi.org/project/omero-metadata/ """ -# Check if the populate_roi scripts was updated to include functionality for encodings other than utf-8 -# If yes, define a function to query all available encodings and set a flag +# Check if the populate_roi scripts was updated to include functionality for +# encodings other than utf-8. +# If yes, query all available encodings and set a flag # If no, add information for the user if "encoding" in DownloadingOriginalFileProvider.get_original_file_data.__code__.co_varnames: import os EncSup = True - AvailEncodings= [] + AvailEncodings = [] for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]): - name=os.path.splitext(i)[0] + name = os.path.splitext(i)[0] try: "".encode(name) except: pass else: - AvailEncodings.append(name.replace("_","-")) + AvailEncodings.append(name.replace("_", "-")) else: encoding = 'utf-8' EncSup = False DEPRECATED += """ - Warning: This script is using an omero-py version without support for different CSV encodings. - All CSV files will be assumed to be utf-8 encoded. If you need support for different encodings, + Warning: This script is using an omero-py version without support + for different CSV encodings. All CSV files will be assumed to be + utf-8 encoded. If you need support for different encodings, ask your administrator to update the installation. """ + def link_file_ann(conn, object_type, object_id, file_ann_id): """Link File Annotation to the Object, if not already linked.""" file_ann = conn.getObject("Annotation", file_ann_id) @@ -131,7 +134,10 @@ def populate_metadata(client, conn, script_params): object_ids = script_params["IDs"] object_id = object_ids[0] data_type = script_params["Data_Type"] - if encSup: encoding = script_params["CSV Encoding"] + + if EncSup: #Only get from user if support for encoding is there + encoding = script_params["CSV Encoding"] + if data_type == "Image": try: from omero_metadata.populate import ImageWrapper # noqa: F401 @@ -147,9 +153,10 @@ def populate_metadata(client, conn, script_params): try: data_for_preprocessing = provider.get_original_file_data(original_file, encoding=encoding) except UnicodeDecodeError as e: - e.add_note("The CSV file provided could not be decoded using the specified encoding. Please check the encoding and contents of the file!") - raise - + raise ValueError("The CSV file provided could not be decoded using " + "the specified encoding. Please check the encoding " + "and contents of the file!") from e + temp_name = data_for_preprocessing.name # 5.9.1 returns NamedTempFile where name is a string. if isinstance(temp_name, int): @@ -193,16 +200,18 @@ def run_script(): "File_Annotation", grouping="3", description="File Annotation ID containing metadata to populate. " "Note this is not the same as the File ID.")] - - # Only display the Encoding field if omero.util.populate_roi has support for different encodings + + # Add Encoding field if support for encodings if EncSup: fields.append( scripts.String( "CSV Encoding", grouping="4", - description="""Encoding of the CSV File provided. Can depend on your system locale - as well as the program used to generate the CSV File. E.g. Excel defaults to machine specific - ANSI encoding during export to CSV (i.e. cp1252 on US machines, iso-8859-1 on german machines ...).""", - values=AvailEncodings,default="utf-8")) - + description="""Encoding of the CSV File provided. Can depend on + your system locale as well as the program used to generate the + CSV File. E.g. Excel defaults to machine specific ANSI encoding + during export to CSV (i.e. cp1252 on US machines, + iso-8859-1 on german machines ...).""", + values=AvailEncodings, default="utf-8")) + client = scripts.client( 'Populate_Metadata.py', """ From 7f8e019c5bdd522ddcf0621fc7be3677a2ede30a Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Tue, 7 Jun 2022 06:56:09 +0200 Subject: [PATCH 10/16] More flake8 fixes --- omero/import_scripts/Populate_Metadata.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/omero/import_scripts/Populate_Metadata.py b/omero/import_scripts/Populate_Metadata.py index e80d13a3a..d7171942e 100644 --- a/omero/import_scripts/Populate_Metadata.py +++ b/omero/import_scripts/Populate_Metadata.py @@ -134,10 +134,10 @@ def populate_metadata(client, conn, script_params): object_ids = script_params["IDs"] object_id = object_ids[0] data_type = script_params["Data_Type"] - - if EncSup: #Only get from user if support for encoding is there + + if EncSup: # Only get from user if support for encoding is there encoding = script_params["CSV Encoding"] - + if data_type == "Image": try: from omero_metadata.populate import ImageWrapper # noqa: F401 @@ -154,8 +154,8 @@ def populate_metadata(client, conn, script_params): data_for_preprocessing = provider.get_original_file_data(original_file, encoding=encoding) except UnicodeDecodeError as e: raise ValueError("The CSV file provided could not be decoded using " - "the specified encoding. Please check the encoding " - "and contents of the file!") from e + "the specified encoding. Please check the encoding " + "and contents of the file!") from e temp_name = data_for_preprocessing.name # 5.9.1 returns NamedTempFile where name is a string. @@ -203,7 +203,7 @@ def run_script(): # Add Encoding field if support for encodings if EncSup: - fields.append( scripts.String( + fields.append(scripts.String( "CSV Encoding", grouping="4", description="""Encoding of the CSV File provided. Can depend on your system locale as well as the program used to generate the From 074acba0b3b54604daeb29d60942100465f5c010 Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Tue, 7 Jun 2022 07:36:00 +0200 Subject: [PATCH 11/16] Fixed flake8 messages except for some line too longs, that are caused by writing strings to csv. --- test/integration/test_import_scripts.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/integration/test_import_scripts.py b/test/integration/test_import_scripts.py index 55d8f88f3..010c31318 100644 --- a/test/integration/test_import_scripts.py +++ b/test/integration/test_import_scripts.py @@ -128,24 +128,24 @@ def test_populate_metadata_for_screen(self): def test_populate_metadata_for_encodings(self): sid = super(TestImportScripts, self).get_script(populate_metadata) assert sid > 0 - import encodings import os from omero.util.populate_roi import DownloadingOriginalFileProvider - # Skip test if the omero-py version does not yet provide support for the encodings + # Skip test if the omero-py version does not support encodings if "encoding" in DownloadingOriginalFileProvider.get_original_file_data.__code__.co_varnames: - print("Skipping test of populate_metadata.py for encodings as omero-py version does not support it!") + print("Skipping test of populate_metadata.py for encodings" + "as omero-py version does not support it!") return - AvailEncodings=[] + AvailEncodings = [] for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]): - name=os.path.splitext(i)[0] + name = os.path.splitext(i)[0] try: "".encode(name) except: pass else: - AvailEncodings.append(name.replace("_","-")) + AvailEncodings.append(name.replace("_", "-")) client, user = self.new_client_and_user() conn = BlitzGateway(client_obj=client) @@ -167,10 +167,10 @@ def test_populate_metadata_for_encodings(self): cvs_file = create_path("test_cp1252", ".csv") # create a file annotation with open(cvs_file.abspath(), 'wb+') as f: - f.write("Well,Plate, Well Type, Facility-Salt-Batch-ID,Comment,\n".encode(enc)) - f.write(("A01,%s,Treatment,FOOL10041-101-2,TestString containing greek µ\n" % name).encode(enc)) - f.write(("A02,%s,Control,TestString containing symbol ±\n" % name).encode(enc)) - f.write(("A03,%s,Treatment,FOOL10041-101-2,TestString containing special character §\n" % name).encode(enc)) + f.write("Well, Plate, Well Type, Facility-Salt-Batch-ID, Comment,\n".encode(enc)) + f.write(("A01, %s, Treatment, FOOL10041-101-2, TestString containing greek µ\n" % name).encode(enc)) + f.write(("A02, %s, Control, FOOL10041-101-2, TestString containing symbol ±\n" % name).encode(enc)) + f.write(("A03, %s, Treatment, FOOL10041-101-2,TestString containing special character §\n" % name).encode(enc)) fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv") assert fa is not None assert fa.id > 0 From a971f2fcfee38202264215aa9b5754d07f0da9d8 Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Tue, 7 Jun 2022 07:38:22 +0200 Subject: [PATCH 12/16] Added try/except to catch the type-error thrown by Populate_Metadata.py to check for clean exit --- test/integration/test_import_scripts.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/integration/test_import_scripts.py b/test/integration/test_import_scripts.py index 010c31318..626ee0090 100644 --- a/test/integration/test_import_scripts.py +++ b/test/integration/test_import_scripts.py @@ -189,7 +189,11 @@ def test_populate_metadata_for_encodings(self): "File_Annotation": omero.rtypes.rstring(str(fa.id)), "CSV Encoding": omero.rtypes.rstring(str(enc)) } - message = run_script(client, sid, args, "Message") - assert message is not None - assert message.getValue().startswith('Table data populated') or message.getValue.startswith('The CSV file provided could not be decoded') + message = None + try: + message = run_script(client, sid, args, "Message") + assert message is not None + assert message.getValue().startswith('Table data populated') + except ValueError as e: + assert str(e).startswith('The CSV file provided could') conn.close() From b1b92a33eb2e55f4fb608da854bcace2765cc095 Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Tue, 7 Jun 2022 07:41:19 +0200 Subject: [PATCH 13/16] Added try around the string creation facility to skip test cases involving encodings that don't support the test strings --- test/integration/test_import_scripts.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/test/integration/test_import_scripts.py b/test/integration/test_import_scripts.py index 626ee0090..af930a730 100644 --- a/test/integration/test_import_scripts.py +++ b/test/integration/test_import_scripts.py @@ -165,12 +165,15 @@ def test_populate_metadata_for_encodings(self): assert screen_id > 0 assert spl.getChild().id.val == plate.id.val cvs_file = create_path("test_cp1252", ".csv") - # create a file annotation - with open(cvs_file.abspath(), 'wb+') as f: - f.write("Well, Plate, Well Type, Facility-Salt-Batch-ID, Comment,\n".encode(enc)) - f.write(("A01, %s, Treatment, FOOL10041-101-2, TestString containing greek µ\n" % name).encode(enc)) - f.write(("A02, %s, Control, FOOL10041-101-2, TestString containing symbol ±\n" % name).encode(enc)) - f.write(("A03, %s, Treatment, FOOL10041-101-2,TestString containing special character §\n" % name).encode(enc)) + # create a file annotation. + try: + with open(cvs_file.abspath(), 'wb+') as f: + f.write("Well, Plate, Well Type, Facility-Salt-Batch-ID, Comment,\n".encode(enc)) + f.write(("A01, %s, Treatment, FOOL10041-101-2, TestString containing greek µ\n" % name).encode(enc)) + f.write(("A02, %s, Control, FOOL10041-101-2, TestString containing symbol ±\n" % name).encode(enc)) + f.write(("A03, %s, Treatment, FOOL10041-101-2,TestString containing special character §\n" % name).encode(enc)) + except UnicodeError: #Skip if test strings are not supported + next fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv") assert fa is not None assert fa.id > 0 From 0d8672e4daaf7d7cb709d7405c21caa1633ea06e Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Tue, 7 Jun 2022 07:45:00 +0200 Subject: [PATCH 14/16] More flake8 fixes --- test/integration/test_import_scripts.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/integration/test_import_scripts.py b/test/integration/test_import_scripts.py index af930a730..9df2ea1f4 100644 --- a/test/integration/test_import_scripts.py +++ b/test/integration/test_import_scripts.py @@ -130,13 +130,13 @@ def test_populate_metadata_for_encodings(self): assert sid > 0 import os from omero.util.populate_roi import DownloadingOriginalFileProvider - + # Skip test if the omero-py version does not support encodings if "encoding" in DownloadingOriginalFileProvider.get_original_file_data.__code__.co_varnames: print("Skipping test of populate_metadata.py for encodings" "as omero-py version does not support it!") return - + AvailEncodings = [] for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]): name = os.path.splitext(i)[0] @@ -146,11 +146,11 @@ def test_populate_metadata_for_encodings(self): pass else: AvailEncodings.append(name.replace("_", "-")) - + client, user = self.new_client_and_user() conn = BlitzGateway(client_obj=client) update_service = client.getSession().getUpdateService() - + for enc in AvailEncodings: plates = self.import_plates(client, plate_cols=3, plate_rows=1) plate = plates[0] @@ -165,14 +165,14 @@ def test_populate_metadata_for_encodings(self): assert screen_id > 0 assert spl.getChild().id.val == plate.id.val cvs_file = create_path("test_cp1252", ".csv") - # create a file annotation. + # create a file annotation. try: with open(cvs_file.abspath(), 'wb+') as f: f.write("Well, Plate, Well Type, Facility-Salt-Batch-ID, Comment,\n".encode(enc)) f.write(("A01, %s, Treatment, FOOL10041-101-2, TestString containing greek µ\n" % name).encode(enc)) f.write(("A02, %s, Control, FOOL10041-101-2, TestString containing symbol ±\n" % name).encode(enc)) f.write(("A03, %s, Treatment, FOOL10041-101-2,TestString containing special character §\n" % name).encode(enc)) - except UnicodeError: #Skip if test strings are not supported + except UnicodeError: # Skip if test strings are not supported next fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv") assert fa is not None @@ -185,7 +185,7 @@ def test_populate_metadata_for_encodings(self): # run the script screen_ids = [] screen_ids.append(spl.getParent().id) - + args = { "Data_Type": omero.rtypes.rstring("Screen"), "IDs": omero.rtypes.rlist(screen_ids), From 9055d7662787af9c580020a97922b63cfed2f304 Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Tue, 7 Jun 2022 07:53:38 +0200 Subject: [PATCH 15/16] More flake8 fixes --- test/integration/test_import_scripts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/test_import_scripts.py b/test/integration/test_import_scripts.py index 9df2ea1f4..36f8836af 100644 --- a/test/integration/test_import_scripts.py +++ b/test/integration/test_import_scripts.py @@ -185,7 +185,7 @@ def test_populate_metadata_for_encodings(self): # run the script screen_ids = [] screen_ids.append(spl.getParent().id) - + args = { "Data_Type": omero.rtypes.rstring("Screen"), "IDs": omero.rtypes.rlist(screen_ids), From c322eb4f439f047670d6c9843a47df19a853323a Mon Sep 17 00:00:00 2001 From: Julian Hniopek Date: Tue, 7 Jun 2022 07:56:15 +0200 Subject: [PATCH 16/16] Changed Caught Exception type, as omero-py upstream changed --- omero/import_scripts/Populate_Metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/omero/import_scripts/Populate_Metadata.py b/omero/import_scripts/Populate_Metadata.py index d7171942e..11d961378 100644 --- a/omero/import_scripts/Populate_Metadata.py +++ b/omero/import_scripts/Populate_Metadata.py @@ -152,7 +152,7 @@ def populate_metadata(client, conn, script_params): provider = DownloadingOriginalFileProvider(conn) try: data_for_preprocessing = provider.get_original_file_data(original_file, encoding=encoding) - except UnicodeDecodeError as e: + except ValueError as e: raise ValueError("The CSV file provided could not be decoded using " "the specified encoding. Please check the encoding " "and contents of the file!") from e