diff --git a/omero/import_scripts/Populate_Metadata.py b/omero/import_scripts/Populate_Metadata.py index 168e80632..11d961378 100644 --- a/omero/import_scripts/Populate_Metadata.py +++ b/omero/import_scripts/Populate_Metadata.py @@ -58,6 +58,33 @@ for additional features: https://pypi.org/project/omero-metadata/ """ +# Check if the populate_roi scripts was updated to include functionality for +# encodings other than utf-8. +# If yes, query all available encodings and set a flag +# If no, add information for the user + +if "encoding" in DownloadingOriginalFileProvider.get_original_file_data.__code__.co_varnames: + import os + EncSup = True + AvailEncodings = [] + for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]): + name = os.path.splitext(i)[0] + try: + "".encode(name) + except: + pass + else: + AvailEncodings.append(name.replace("_", "-")) +else: + encoding = 'utf-8' + EncSup = False + DEPRECATED += """ + Warning: This script is using an omero-py version without support + for different CSV encodings. All CSV files will be assumed to be + utf-8 encoded. If you need support for different encodings, + ask your administrator to update the installation. + """ + def link_file_ann(conn, object_type, object_id, file_ann_id): """Link File Annotation to the Object, if not already linked.""" @@ -108,6 +135,9 @@ def populate_metadata(client, conn, script_params): object_id = object_ids[0] data_type = script_params["Data_Type"] + if EncSup: # Only get from user if support for encoding is there + encoding = script_params["CSV Encoding"] + if data_type == "Image": try: from omero_metadata.populate import ImageWrapper # noqa: F401 @@ -120,7 +150,13 @@ def populate_metadata(client, conn, script_params): original_file = get_original_file( conn, data_type, object_id, file_ann_id) provider = DownloadingOriginalFileProvider(conn) - data_for_preprocessing = provider.get_original_file_data(original_file) + try: + data_for_preprocessing = provider.get_original_file_data(original_file, encoding=encoding) + except ValueError as e: + raise ValueError("The CSV file provided could not be decoded using " + "the specified encoding. Please check the encoding " + "and contents of the file!") from e + temp_name = data_for_preprocessing.name # 5.9.1 returns NamedTempFile where name is a string. if isinstance(temp_name, int): @@ -150,16 +186,8 @@ def populate_metadata(client, conn, script_params): def run_script(): data_types = [rstring(otype) for otype in OBJECT_TYPES] - client = scripts.client( - 'Populate_Metadata.py', - """ - This script processes a CSV file, using it to - 'populate' an OMERO.table, with one row per Image, Well or ROI. - The table data can then be displayed in the OMERO clients. - For full details of the supported CSV format, see - https://github.com/ome/omero-metadata/#populate - """ + DEPRECATED, - scripts.String( + + fields = [scripts.String( "Data_Type", optional=False, grouping="1", description="Choose source of images", values=data_types, default=OBJECT_TYPES[0]), @@ -171,8 +199,29 @@ def run_script(): scripts.String( "File_Annotation", grouping="3", description="File Annotation ID containing metadata to populate. " - "Note this is not the same as the File ID."), + "Note this is not the same as the File ID.")] + + # Add Encoding field if support for encodings + if EncSup: + fields.append(scripts.String( + "CSV Encoding", grouping="4", + description="""Encoding of the CSV File provided. Can depend on + your system locale as well as the program used to generate the + CSV File. E.g. Excel defaults to machine specific ANSI encoding + during export to CSV (i.e. cp1252 on US machines, + iso-8859-1 on german machines ...).""", + values=AvailEncodings, default="utf-8")) + client = scripts.client( + 'Populate_Metadata.py', + """ + This script processes a CSV file, using it to + 'populate' an OMERO.table, with one row per Image, Well or ROI. + The table data can then be displayed in the OMERO clients. + For full details of the supported CSV format, see + https://github.com/ome/omero-metadata/#populate + """ + DEPRECATED, + *fields, authors=["Emil Rozbicki", "OME Team"], institutions=["Glencoe Software Inc."], contact="ome-users@lists.openmicroscopy.org.uk", diff --git a/test/integration/test_import_scripts.py b/test/integration/test_import_scripts.py index 3cc8b8d2b..36f8836af 100644 --- a/test/integration/test_import_scripts.py +++ b/test/integration/test_import_scripts.py @@ -124,3 +124,79 @@ def test_populate_metadata_for_screen(self): assert message is not None assert message.getValue().startswith('Table data populated') conn.close() + + def test_populate_metadata_for_encodings(self): + sid = super(TestImportScripts, self).get_script(populate_metadata) + assert sid > 0 + import os + from omero.util.populate_roi import DownloadingOriginalFileProvider + + # Skip test if the omero-py version does not support encodings + if "encoding" in DownloadingOriginalFileProvider.get_original_file_data.__code__.co_varnames: + print("Skipping test of populate_metadata.py for encodings" + "as omero-py version does not support it!") + return + + AvailEncodings = [] + for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]): + name = os.path.splitext(i)[0] + try: + "".encode(name) + except: + pass + else: + AvailEncodings.append(name.replace("_", "-")) + + client, user = self.new_client_and_user() + conn = BlitzGateway(client_obj=client) + update_service = client.getSession().getUpdateService() + + for enc in AvailEncodings: + plates = self.import_plates(client, plate_cols=3, plate_rows=1) + plate = plates[0] + name = plate.name.val + screen = omero.model.ScreenI() + screen.name = omero.rtypes.rstring("test_for_%s" % (enc)) + spl = omero.model.ScreenPlateLinkI() + spl.setParent(screen) + spl.setChild(plate) + spl = update_service.saveAndReturnObject(spl) + screen_id = spl.getParent().id.val + assert screen_id > 0 + assert spl.getChild().id.val == plate.id.val + cvs_file = create_path("test_cp1252", ".csv") + # create a file annotation. + try: + with open(cvs_file.abspath(), 'wb+') as f: + f.write("Well, Plate, Well Type, Facility-Salt-Batch-ID, Comment,\n".encode(enc)) + f.write(("A01, %s, Treatment, FOOL10041-101-2, TestString containing greek µ\n" % name).encode(enc)) + f.write(("A02, %s, Control, FOOL10041-101-2, TestString containing symbol ±\n" % name).encode(enc)) + f.write(("A03, %s, Treatment, FOOL10041-101-2,TestString containing special character §\n" % name).encode(enc)) + except UnicodeError: # Skip if test strings are not supported + next + fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv") + assert fa is not None + assert fa.id > 0 + link = omero.model.ScreenAnnotationLinkI() + link.setParent(omero.model.ScreenI(screen_id, False)) + link.setChild(omero.model.FileAnnotationI(fa.id, False)) + link = update_service.saveAndReturnObject(link) + assert link.id.val > 0 + # run the script + screen_ids = [] + screen_ids.append(spl.getParent().id) + + args = { + "Data_Type": omero.rtypes.rstring("Screen"), + "IDs": omero.rtypes.rlist(screen_ids), + "File_Annotation": omero.rtypes.rstring(str(fa.id)), + "CSV Encoding": omero.rtypes.rstring(str(enc)) + } + message = None + try: + message = run_script(client, sid, args, "Message") + assert message is not None + assert message.getValue().startswith('Table data populated') + except ValueError as e: + assert str(e).startswith('The CSV file provided could') + conn.close()