Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Quality Control Automation Program

This is the current working version of the Quality Control Program designed to fit into Schaffer Library's digitization workflow.
This is the current working version of the Quality Control Program designed to fit into Schaffer Library's digitization workflow. if you're interested in how this project was created and how it has evolved over time please see our article: [Quality Control Automation for Student Driven Digitization Workflows](https://journal.code4lib.org/articles/18340)

## Setup

Expand All @@ -14,7 +14,12 @@ The program works with python 3.12.5, and will likely work with later versions o

## Features

1. Spreadsheet checks - this function checks for:
1. Spreadsheet generation - this function:
* Asks the user for a filename and sheetnames
* Generates a spreadsheet in the Union College ARCHES format (for easy upload to our file system)
* Places the output file on Desktop, expands the columns to make input easier, and opens the file

2. Spreadsheet checks - this function checks for:
* Incorrect date formats. Highlighted or corrected based on how extreme the issue is.
* Mismacthed locations and filenames.
* Duplicate filenames.
Expand All @@ -34,4 +39,4 @@ The object file structure lends itself well to a more complete quality control p

## Testing

Given the limited output of the digitization department at Union College we have only been able to test the program on a limited number of records (circa 500.) If you discover any issues please add these to the Issues section in github and we will work to resolve it.
Given the limited output of the digitization department at Union College we have only been able to test the program on a limited number of records (circa 500.) If you discover any issues please add these to the Issues section in github and we will work to resolve it.
53 changes: 41 additions & 12 deletions app/src/fileHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,17 @@ def set_field_format(ws, column_name, column_index):
cell.alignment = Alignment(horizontal='right')
cell.number_format = "YYYY-MM-DD"

# Test this
# Can we call row highlighter from this method?
# ^^ Shouldn't do it this way because sheet not written to for spreadsheetChecks only PreliminaryQC

"""Writes the edited excelfile over the original version
Resets the original colors of the spreadsheet based on current error/fail colors
and the cached colors saved to file. Sets the date format to the requires ISO
for upload to ARCHES
Args:
ExcelFile: the current file after all QC changes were made
Returns:
True: if the save is completed
False: if the file is open in editor so the user can be made aware with an error message
"""
def write_excelfile(ExcelFile):
wb = openpyxl.load_workbook(ExcelFile.filePath)

Expand Down Expand Up @@ -149,8 +157,16 @@ def write_excelfile(ExcelFile):
return True
except:
return False



"""Determines whether the filename selected by the user already has a file extension
Args:
filename: the name selected by the user in the GUI
Returns:
True if this extension is an excel file
False if the file does not contain an extension
ValueError if the file has an extension but it is not in the proper format
"""
def extract_ext(filename):
if '.' in filename:
ext = filename.split('.')[-1]
Expand All @@ -161,13 +177,23 @@ def extract_ext(filename):
else:
return False

"""Simple Tkinter window opened if the file already exists
Asks the user if they would like to overwrite
Returns the choice which causes the spreadsheet creation to proceed or cancel
"""
def ask_yes_cancel(title="Confirm", message="This file already exists. Would you like to overwrite?"):
"""Simple """
root = tk.Tk()
root.withdraw()
result = messagebox.askyesnocancel(title, message)
root.destroy()
return result

"""Creates a spreadsheet based on the template at Union College
Adds as many sheets as requested by the user with their respective names
Adds the fields currently used for digitization as headers
Resizes the columns to improve readability
"""
def generateSpreadsheet(filename, sheetnames):
desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")

Expand All @@ -189,16 +215,19 @@ def generateSpreadsheet(filename, sheetnames):
default_sheet = wb.active
wb.remove(default_sheet)

headers = ['contributor', 'contributor_role', 'subjects_personal_names',
'Language', 'publisher', 'date_created_free', 'date_created', 'genre',
'rights_statements', 'extent (total page count including covers)',
'Physical Location', 'Scanning Instructions', 'Filename', 'date_digital',
'Scanner Initials', 'QC Pass/Fail', 'QC Initials', 'QC Comments']
headers = ['ismemberof', 'aspace_id', 'documents', 'local_identifier', 'aspaceTitle',
'label (title)', 'titleProper', 'creator', 'creator_role', 'contributor',
'contributor_role', 'subjects_personal_names','Language', 'publisher',
'date_created_free', 'date_created', 'genre', 'rights_statements',
'extent (total page count including covers)', 'Physical Location',
'Scanning Instructions', 'Filename', 'date_digital','Scanner Initials',
'QC Pass/Fail', 'QC Initials', 'QC Comments']

column_widths = {
'A': 22, 'B': 15, 'C': 27, 'D': 32, 'E': 32, 'F': 35, 'G': 37,
'H': 14, 'I': 40, 'J': 23, 'K': 25, 'L': 23, 'M': 17, 'N': 23,
'O': 23, 'P': 23, 'Q': 23, 'R': 23
'A': 12, 'B': 12, 'C': 12, 'D': 12, 'E': 12, 'F': 28, 'G': 12, 'H': 70,
'I': 30, 'J': 12, 'K': 22, 'L': 15, 'M': 27, 'N': 32, 'O': 32, 'P': 35,
'Q': 37, 'R': 14, 'S': 40, 'T': 23, 'U': 25, 'V': 23, 'W': 17, 'X': 23,
'Y': 23, 'Z': 23, 'AA': 23
}

for name in sheetnames:
Expand Down
6 changes: 2 additions & 4 deletions app/src/main.pyw
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class GenerateSpreadsheetDialog(QtWidgets.QDialog):
pr = parent.mapToGlobal(pg.topRight())
self.move(pr.x() - self.width() + 20, pr.y() + 20)

# reuse main-window button style if available

self.button_style = getattr(parent, 'button_style', """
QPushButton {
background-color: rgb(225, 225, 225);
Expand All @@ -110,7 +110,6 @@ class GenerateSpreadsheetDialog(QtWidgets.QDialog):
}
""")

# Filename input
lbl_file = QtWidgets.QLabel("Filename:", self)
lbl_file.setGeometry(10, 10, 200, 20)
self.filenameEdit = QtWidgets.QLineEdit(self)
Expand All @@ -123,7 +122,6 @@ class GenerateSpreadsheetDialog(QtWidgets.QDialog):
""")
self.filenameEdit.setPlaceholderText("e.g. output.xlsx")

# Single-sheet entry + Add button
lbl_sheet = QtWidgets.QLabel("Sheet Name:", self)
lbl_sheet.setGeometry(10, 70, 100, 20)
self.sheetNameEdit = QtWidgets.QLineEdit(self)
Expand All @@ -134,7 +132,7 @@ class GenerateSpreadsheetDialog(QtWidgets.QDialog):
border-radius: 6px;
padding: 2px;
""")
self.sheetNameEdit.setPlaceholderText("e.g. Sheet1")
self.sheetNameEdit.setPlaceholderText("e.g. Box 1")
self.sheetNameEdit.textChanged.connect(self._updateAddState)

self.addSheetButton = QtWidgets.QPushButton("Add ▶", self)
Expand Down