diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..ddfd64d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "open-oni"] + path = open-oni + url = git@github.com:codemyriad/open-oni.git + branch = masca diff --git a/create_awardee_title.py b/create_awardee_title.py new file mode 100644 index 0000000..b027351 --- /dev/null +++ b/create_awardee_title.py @@ -0,0 +1,16 @@ +import datetime +from core.models import Title, Language, Country, Awardee + +Awardee.objects.get_or_create(org_code="lamasca", name="La Masca") +Title.objects.all().delete() +title = Title.objects.create( + lccn = "sn00000001", # This needs to be unique across the platform + lccn_orig = "sn00000001", + name = "La masca", + name_normal = "la-masca", + start_year = "1994", + end_year = "1994", + country = Country.objects.get(code="it"), + version = datetime.datetime.now() +) +title.languages.set([Language.objects.get(code="ita")]) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..2d49079 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,64 @@ +services: + rdbms: + image: mariadb:10.6 + environment: + - MYSQL_ROOT_PASSWORD=123456 + - MYSQL_DATABASE=openoni + - MYSQL_USER=openoni + - MYSQL_PASSWORD=openoni + volumes: + - ./conf/mysql/:/etc/mysql/conf.d:Z + - data-mariadb:/var/lib/mysql + command: ['--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci'] + solr: + image: solr:8-slim + volumes: + - data-solr:/var/solr + command: + - solr-precreate + - openoni + rais: + image: uolibraries/rais:4 + environment: + - RAIS_IIIFWEBPATH=/images/iiif + - RAIS_IIIFBASEURL=${ONI_BASE_URL:-http://localhost} + - RAIS_TILECACHELEN=250 + - RAIS_TILEPATH=/opt/openoni/data/batches + volumes: + # Image files must be available at RAIS_TILEPATH + - ./test_batches:/opt/openoni/data/batches:z + web: + image: ghcr.io/codemyriad/lamasca-open-oni-web:latest + volumes: + - ./open-oni:/opt/openoni:z + - ./create_awardee_title.py:/opt/create_awardee_title.py + - ./test_batches:/opt/openoni/data/batches:z + depends_on: + - rdbms + links: + - rdbms + - solr + - rais + environment: + - APACHE_LOG_LEVEL=${APACHE_LOG_LEVEL:-warn} + - ONI_BASE_URL + - ONI_CHRONAM_API_THROTTLE + - ONI_DB_HOST + - ONI_DB_PORT + - ONI_DB_NAME + - ONI_DB_USER + - ONI_DB_PASSWORD + - ONI_DEBUG + - ONI_HSTS_SECONDS + - ONI_IIIF_URL + - ONI_LOG_LEVEL + - ONI_LOG_SQL + - ONI_LOG_TO_FILE + - ONI_SECRET_KEY + - ONI_SOLR_URL + ports: + - ${HTTPPORT:-80}:80 + +volumes: + data-mariadb: {} + data-solr: {} diff --git a/how-to-test-alto-xml.md b/how-to-test-alto-xml.md new file mode 100644 index 0000000..d1228de --- /dev/null +++ b/how-to-test-alto-xml.md @@ -0,0 +1,18 @@ +# How to test an Alto XML file + +Setup Open ONI as submodule. This is needed since the open-oni folder will be mounted in the `web` container. + + `git submodule --init --checkout` + +Than you should be able to test an XML Alto file with: + + `bash test_alto_batch.sh ` + +The script will lookup for a .jp2 and .tif image in the same directory and with the same name as the Alto XML file. + + e.g. + + test_alto + |-- 0001.xml + |-- 0001.jp2 + |-- 0001.tif diff --git a/load_alto_batch.sh b/load_alto_batch.sh new file mode 100644 index 0000000..87bf800 --- /dev/null +++ b/load_alto_batch.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Check if input file is provided +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +# Input Alto XML file +ALTO_XML_FILE=$(realpath "$1") + +# Create temporary batch directory structure +BATCH_DIR=$(mktemp -d) +BATCH_NAME=$(basename "${ALTO_XML_FILE%.*}") + +# Create Open ONI batch directory structure +mkdir -p "${BATCH_DIR}/${BATCH_NAME}/alto" +mkdir -p "${BATCH_DIR}/${BATCH_NAME}/jp2" +mkdir -p "${BATCH_DIR}/${BATCH_NAME}/tiff" + +# Copy Alto XML to batch directory +cp "$ALTO_XML_FILE" "${BATCH_DIR}/${BATCH_NAME}/alto/" + +# Create a dummy mets.xml file (required for Open ONI batch) +cat << EOF > "${BATCH_DIR}/${BATCH_NAME}/mets.xml" + + + + + + ${BATCH_NAME} + + + + +EOF + +# Create a dummy image to satisfy Open ONI requirements +convert -size 2000x3000 xc:white "${BATCH_DIR}/${BATCH_NAME}/jp2/${BATCH_NAME}_0001.jp2" +convert -size 2000x3000 xc:white "${BATCH_DIR}/${BATCH_NAME}/tiff/${BATCH_NAME}_0001.tif" + +# Path to Open ONI docker-compose file (adjust as needed) +DOCKER_COMPOSE_PATH="/path/to/open-oni/docker-compose.yml" + +# Ensure docker-compose file exists +if [ ! -f "$DOCKER_COMPOSE_PATH" ]; then + echo "Docker Compose file not found at $DOCKER_COMPOSE_PATH" + exit 1 +fi + +# Stop any existing Open ONI containers +docker-compose -f "$DOCKER_COMPOSE_PATH" down + +# Start Docker Compose with batch mounted +docker-compose -f "$DOCKER_COMPOSE_PATH" up -d + +# Wait for the container to fully start +sleep 15 + +# Load the batch into Open ONI +docker-compose -f "$DOCKER_COMPOSE_PATH" exec -T web python manage.py load_batch /opt/newspaper_batches/"$BATCH_NAME" + +echo "Batch ${BATCH_NAME} loaded successfully!" diff --git a/open-oni b/open-oni new file mode 160000 index 0000000..531f30f --- /dev/null +++ b/open-oni @@ -0,0 +1 @@ +Subproject commit 531f30fe0e55338fd279b987de398e12c4bf417b diff --git a/test_alto_batch.sh b/test_alto_batch.sh new file mode 100644 index 0000000..28bbbc7 --- /dev/null +++ b/test_alto_batch.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +set -e + +# Check if input file is provided +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +# Input Alto XML file +ALTO_XML_FILE=$(realpath "$1") +ALTO_XML_FILENAME=$(basename "${ALTO_XML_FILE%.*}") + +# Create temporary batch directory structure +BATCH_DIR=./test_batches +BATCH_NAME="batch_lamasca_${ALTO_XML_FILENAME}_ver01" +BATCH_DATE=1994011201 + +echo "Creating test batch in ${BATCH_DIR}/${BATCH_NAME}" + +# Create Open ONI batch directory structure +mkdir -p "${BATCH_DIR}/${BATCH_NAME}/data/sn00000001/001/${BATCH_DATE}" + +# Copy Alto XML to batch directory +cp "$ALTO_XML_FILE" "${BATCH_DIR}/${BATCH_NAME}/data/sn00000001/001/${BATCH_DATE}/0001.xml" +cp "$ALTO_XML_FILENAME.jp2" "${BATCH_DIR}/${BATCH_NAME}/data/sn00000001/001/${BATCH_DATE}/0001.jp2" +cp "$ALTO_XML_FILENAME.tif" "${BATCH_DIR}/${BATCH_NAME}/data/sn00000001/001/${BATCH_DATE}/0001.tif" + +# Create a dummy batch_xml file (required for Open ONI batch) +cat << EOF > "${BATCH_DIR}/${BATCH_NAME}/data/batch.xml" + + + ./sn00000001/001/${BATCH_DATE}/${BATCH_DATE}.xml + +EOF + +# Copy default mets file +cp test_batches/default_mets.xml ${BATCH_DIR}/${BATCH_NAME}/data/sn00000001/001/${BATCH_DATE}/${BATCH_DATE}.xml + +# Path to Open ONI docker-compose file (adjust as needed) +DOCKER_COMPOSE_PATH="./docker-compose.yml" + +# Ensure docker-compose file exists +if [ ! -f "$DOCKER_COMPOSE_PATH" ]; then + echo "Docker Compose file not found at $DOCKER_COMPOSE_PATH" + exit 1 +fi + +# Stop any existing Open ONI containers +docker compose -f "$DOCKER_COMPOSE_PATH" down + +# Start Docker Compose with batch mounted +docker compose -f "$DOCKER_COMPOSE_PATH" up -d + +# Wait for the container to fully start +sleep 15 + +# Create Awardee and Title objects if they don't exists +docker compose -f "$DOCKER_COMPOSE_PATH" exec -T web bash -c "source ENV/bin/activate && python manage.py shell < /opt/create_awardee_title.py" + +# Purge the batch if it exists in Open ONI +docker compose -f "$DOCKER_COMPOSE_PATH" exec -T web bash -c "source ENV/bin/activate && python manage.py purge_batch $BATCH_NAME" + +# Load the batch into Open ONI +docker compose -f "$DOCKER_COMPOSE_PATH" exec -T web bash -c "source ENV/bin/activate && python manage.py load_batch /opt/openoni/data/batches/$BATCH_NAME" + +docker compose -f "$DOCKER_COMPOSE_PATH" exec -T web bash -c "curl -s http://localhost/lccn/sn00000001/1994-01-12/ed-1/seq-1/" + +# Change permissions for the django cache +docker compose -f "$DOCKER_COMPOSE_PATH" exec -T web bash -c "chown -R www-data:www-data /var/tmp/django_cache" + +echo "Batch ${BATCH_NAME} loaded successfully!" + +echo "You should be able to see your page at http://localhost/lccn/sn00000001/1994-01-12/ed-1/seq-1/" +echo "Useful links:" +echo "OCR: http://localhost/lccn/sn00000001/1994-01-12/ed-1/seq-1/ocr/" +echo "Text coordinates: http://localhost/lccn/sn00000001/1994-01-12/ed-1/seq-1/coordinates/"