From c120bbf378d5f527e0b92a237d96fde055c93a82 Mon Sep 17 00:00:00 2001 From: James Gaskell Date: Mon, 12 May 2025 12:35:10 -0400 Subject: [PATCH 1/6] added documentation --- app/src/fileHandler.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/app/src/fileHandler.py b/app/src/fileHandler.py index 85439b1..61a53c5 100644 --- a/app/src/fileHandler.py +++ b/app/src/fileHandler.py @@ -149,8 +149,16 @@ def write_excelfile(ExcelFile): return True except: return False - + +"""Determines whether the filename selected by the user already has a file extension +Args: + filename: the name selected by the user in the GUI +Returns: + True if this extension is an excel file + False if the file does not contain an extension + ValueError if the file has an extension but it is not in the proper format +""" def extract_ext(filename): if '.' in filename: ext = filename.split('.')[-1] @@ -162,6 +170,7 @@ def extract_ext(filename): return False def ask_yes_cancel(title="Confirm", message="This file already exists. Would you like to overwrite?"): + """Simple """ root = tk.Tk() root.withdraw() result = messagebox.askyesnocancel(title, message) @@ -189,11 +198,13 @@ def generateSpreadsheet(filename, sheetnames): default_sheet = wb.active wb.remove(default_sheet) - headers = ['contributor', 'contributor_role', 'subjects_personal_names', - 'Language', 'publisher', 'date_created_free', 'date_created', 'genre', - 'rights_statements', 'extent (total page count including covers)', - 'Physical Location', 'Scanning Instructions', 'Filename', 'date_digital', - 'Scanner Initials', 'QC Pass/Fail', 'QC Initials', 'QC Comments'] + headers = ['ismemberof', 'aspace_id', 'documents', 'local_identifier', 'aspaceTitle', + 'label (title)', 'titleProper', 'creator', 'creator_role', 'contributor', + 'contributor_role', 'subjects_personal_names','Language', 'publisher', + 'date_created_free', 'date_created', 'genre', 'rights_statements', + 'extent (total page count including covers)', 'Physical Location', + 'Scanning Instructions', 'Filename', 'date_digital','Scanner Initials', + 'QC Pass/Fail', 'QC Initials', 'QC Comments'] column_widths = { 'A': 22, 'B': 15, 'C': 27, 'D': 32, 'E': 32, 'F': 35, 'G': 37, @@ -204,8 +215,9 @@ def generateSpreadsheet(filename, sheetnames): for name in sheetnames: ws = wb.create_sheet(title=name) ws.append(headers) - for column, width in column_widths.items(): - ws.column_dimensions[column].width = width + for col_idx in range(1, ws.max_column + 1): + column_letter = get_column_letter(col_idx) + ws.column_dimensions[column_letter].width = 20 # Set your desired width wb.save(filepath) From 5e79bcc94349974110e8b0d90231db315381538b Mon Sep 17 00:00:00 2001 From: James Gaskell Date: Mon, 12 May 2025 12:50:50 -0400 Subject: [PATCH 2/6] cleanup and createSpreadsheet bug fixes --- app/src/fileHandler.py | 29 +++++++++++++++++++++++------ app/src/main.pyw | 6 ++---- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/app/src/fileHandler.py b/app/src/fileHandler.py index 61a53c5..655603d 100644 --- a/app/src/fileHandler.py +++ b/app/src/fileHandler.py @@ -118,9 +118,17 @@ def set_field_format(ws, column_name, column_index): cell.alignment = Alignment(horizontal='right') cell.number_format = "YYYY-MM-DD" -# Test this -# Can we call row highlighter from this method? -# ^^ Shouldn't do it this way because sheet not written to for spreadsheetChecks only PreliminaryQC + +"""Writes the edited excelfile over the original version + Resets the original colors of the spreadsheet based on current error/fail colors + and the cached colors saved to file. Sets the date format to the requires ISO + for upload to ARCHES +Args: + ExcelFile: the current file after all QC changes were made +Returns: + True: if the save is completed + False: if the file is open in editor so the user can be made aware with an error message +""" def write_excelfile(ExcelFile): wb = openpyxl.load_workbook(ExcelFile.filePath) @@ -169,6 +177,10 @@ def extract_ext(filename): else: return False +"""Simple Tkinter window opened if the file already exists + Asks the user if they would like to overwrite + Returns the choice which causes the spreadsheet creation to proceed or cancel +""" def ask_yes_cancel(title="Confirm", message="This file already exists. Would you like to overwrite?"): """Simple """ root = tk.Tk() @@ -177,6 +189,11 @@ def ask_yes_cancel(title="Confirm", message="This file already exists. Would you root.destroy() return result +"""Creates a spreadsheet based on the template at Union College + Adds as many sheets as requested by the user with their respective names + Adds the fields currently used for digitization as headers + Resizes the columns to improve readability +""" def generateSpreadsheet(filename, sheetnames): desktop_path = os.path.join(os.path.expanduser("~"), "Desktop") @@ -207,9 +224,9 @@ def generateSpreadsheet(filename, sheetnames): 'QC Pass/Fail', 'QC Initials', 'QC Comments'] column_widths = { - 'A': 22, 'B': 15, 'C': 27, 'D': 32, 'E': 32, 'F': 35, 'G': 37, - 'H': 14, 'I': 40, 'J': 23, 'K': 25, 'L': 23, 'M': 17, 'N': 23, - 'O': 23, 'P': 23, 'Q': 23, 'R': 23 + 'A': 12, 'B': 12, 'C': 12, 'D': 12, 'E': 12, 'F': 28, 'G': 12, 'H': 70, 'I': 30, 'J': 12, 'K': 22, 'L': 15, 'M': 27, 'N': 32, 'O': 32, 'P': 35, 'Q': 37, + 'R': 14, 'S': 40, 'T': 23, 'U': 25, 'V': 23, 'W': 17, 'X': 23, + 'Y': 23, 'Z': 23, 'AA': 23 } for name in sheetnames: diff --git a/app/src/main.pyw b/app/src/main.pyw index 1f58fe4..26b5211 100644 --- a/app/src/main.pyw +++ b/app/src/main.pyw @@ -95,7 +95,7 @@ class GenerateSpreadsheetDialog(QtWidgets.QDialog): pr = parent.mapToGlobal(pg.topRight()) self.move(pr.x() - self.width() + 20, pr.y() + 20) - # reuse main-window button style if available + self.button_style = getattr(parent, 'button_style', """ QPushButton { background-color: rgb(225, 225, 225); @@ -110,7 +110,6 @@ class GenerateSpreadsheetDialog(QtWidgets.QDialog): } """) - # Filename input lbl_file = QtWidgets.QLabel("Filename:", self) lbl_file.setGeometry(10, 10, 200, 20) self.filenameEdit = QtWidgets.QLineEdit(self) @@ -123,7 +122,6 @@ class GenerateSpreadsheetDialog(QtWidgets.QDialog): """) self.filenameEdit.setPlaceholderText("e.g. output.xlsx") - # Single-sheet entry + Add button lbl_sheet = QtWidgets.QLabel("Sheet Name:", self) lbl_sheet.setGeometry(10, 70, 100, 20) self.sheetNameEdit = QtWidgets.QLineEdit(self) @@ -134,7 +132,7 @@ class GenerateSpreadsheetDialog(QtWidgets.QDialog): border-radius: 6px; padding: 2px; """) - self.sheetNameEdit.setPlaceholderText("e.g. Sheet1") + self.sheetNameEdit.setPlaceholderText("e.g. Box 1") self.sheetNameEdit.textChanged.connect(self._updateAddState) self.addSheetButton = QtWidgets.QPushButton("Add ▶", self) From 8d5982c9dad4e2983c10bd108b80e328cfb30843 Mon Sep 17 00:00:00 2001 From: James Gaskell Date: Mon, 12 May 2025 12:52:26 -0400 Subject: [PATCH 3/6] tiny cormat change for readability --- app/src/fileHandler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/src/fileHandler.py b/app/src/fileHandler.py index 655603d..c495ce3 100644 --- a/app/src/fileHandler.py +++ b/app/src/fileHandler.py @@ -224,8 +224,9 @@ def generateSpreadsheet(filename, sheetnames): 'QC Pass/Fail', 'QC Initials', 'QC Comments'] column_widths = { - 'A': 12, 'B': 12, 'C': 12, 'D': 12, 'E': 12, 'F': 28, 'G': 12, 'H': 70, 'I': 30, 'J': 12, 'K': 22, 'L': 15, 'M': 27, 'N': 32, 'O': 32, 'P': 35, 'Q': 37, - 'R': 14, 'S': 40, 'T': 23, 'U': 25, 'V': 23, 'W': 17, 'X': 23, + 'A': 12, 'B': 12, 'C': 12, 'D': 12, 'E': 12, 'F': 28, 'G': 12, 'H': 70, + 'I': 30, 'J': 12, 'K': 22, 'L': 15, 'M': 27, 'N': 32, 'O': 32, 'P': 35, + 'Q': 37, 'R': 14, 'S': 40, 'T': 23, 'U': 25, 'V': 23, 'W': 17, 'X': 23, 'Y': 23, 'Z': 23, 'AA': 23 } From 0c6214a769618684cb869dd050c6fa96930fa3b1 Mon Sep 17 00:00:00 2001 From: James Gaskell Date: Mon, 12 May 2025 12:54:53 -0400 Subject: [PATCH 4/6] fixed column expansion --- app/src/fileHandler.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/src/fileHandler.py b/app/src/fileHandler.py index c495ce3..12a685a 100644 --- a/app/src/fileHandler.py +++ b/app/src/fileHandler.py @@ -233,9 +233,8 @@ def generateSpreadsheet(filename, sheetnames): for name in sheetnames: ws = wb.create_sheet(title=name) ws.append(headers) - for col_idx in range(1, ws.max_column + 1): - column_letter = get_column_letter(col_idx) - ws.column_dimensions[column_letter].width = 20 # Set your desired width + for column, width in column_widths.items(): + ws.column_dimensions[column].width = width wb.save(filepath) From 577cd8f3eedda818fe53ba0cf8097d1730feac30 Mon Sep 17 00:00:00 2001 From: Gaskellj <114235639+Gaskellj@users.noreply.github.com> Date: Mon, 12 May 2025 13:00:13 -0400 Subject: [PATCH 5/6] Added link to Code4Lib article --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7176a87..74b825c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Quality Control Automation Program -This is the current working version of the Quality Control Program designed to fit into Schaffer Library's digitization workflow. +This is the current working version of the Quality Control Program designed to fit into Schaffer Library's digitization workflow. if you're interested in how this project was created and how it has evolved over time please see our article: [Quality Control Automation for Student Driven Digitization Workflows](https://journal.code4lib.org/articles/18340) ## Setup @@ -34,4 +34,4 @@ The object file structure lends itself well to a more complete quality control p ## Testing -Given the limited output of the digitization department at Union College we have only been able to test the program on a limited number of records (circa 500.) If you discover any issues please add these to the Issues section in github and we will work to resolve it. \ No newline at end of file +Given the limited output of the digitization department at Union College we have only been able to test the program on a limited number of records (circa 500.) If you discover any issues please add these to the Issues section in github and we will work to resolve it. From b013c0cfb8175c7cc60de6d4b47b12b813124ed3 Mon Sep 17 00:00:00 2001 From: Gaskellj <114235639+Gaskellj@users.noreply.github.com> Date: Mon, 12 May 2025 13:03:09 -0400 Subject: [PATCH 6/6] added spreadsheet gen to readme --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 74b825c..dd00a0a 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,12 @@ The program works with python 3.12.5, and will likely work with later versions o ## Features -1. Spreadsheet checks - this function checks for: +1. Spreadsheet generation - this function: + * Asks the user for a filename and sheetnames + * Generates a spreadsheet in the Union College ARCHES format (for easy upload to our file system) + * Places the output file on Desktop, expands the columns to make input easier, and opens the file + +2. Spreadsheet checks - this function checks for: * Incorrect date formats. Highlighted or corrected based on how extreme the issue is. * Mismacthed locations and filenames. * Duplicate filenames.