diff --git a/app/jobs/generate_delta_dump_job.rb b/app/jobs/generate_delta_dump_job.rb index bebe74af..41d93d7d 100644 --- a/app/jobs/generate_delta_dump_job.rb +++ b/app/jobs/generate_delta_dump_job.rb @@ -29,8 +29,7 @@ def perform(stream, from_date: nil, effective_date: Time.zone.now, publish: true normalized_dump = delta_dump.build_normalized_dump(stream: stream) base_name = "#{stream.organization.slug}#{"-#{stream.slug}" unless stream.default?}-#{effective_date.strftime('%FT%T')}-delta" - writer = MarcRecordWriterService.new(base_name) - oai_writer = ChunkedOaiMarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date) + writer = MarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date) job_tracker.update(total: uploads.sum(&:marc_records_count)) @@ -43,10 +42,8 @@ def perform(stream, from_date: nil, effective_date: Time.zone.now, publish: true records.each do |record| if record.status == 'delete' writer.write_delete(record) - oai_writer.write_delete(record) else writer.write_marc_record(record) - oai_writer.write_marc_record(record) end end @@ -54,12 +51,6 @@ def perform(stream, from_date: nil, effective_date: Time.zone.now, publish: true end writer.finalize - oai_writer.finalize - - writer.files.each do |as, file| - normalized_dump.public_send(as).attach(io: File.open(file), - filename: human_readable_filename(base_name, as)) - end normalized_dump.update(published_at: effective_date) delta_dump.published_at = Time.zone.now if publish @@ -68,29 +59,7 @@ def perform(stream, from_date: nil, effective_date: Time.zone.now, publish: true ensure writer.close writer.unlink - - oai_writer.close - oai_writer.unlink end end # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity - - private - - def human_readable_filename(base_name, file_type, counter = nil) - as = case file_type - when :deletes - 'deletes.del.txt' - when :marc21 - 'marc21.mrc.gz' - when :marcxml - 'marcxml.xml.gz' - when :oai_xml - "oai-#{format('%010d', counter)}.xml.gz" - else - "#{file_type}.gz" - end - - "#{base_name}-#{as}" - end end diff --git a/app/jobs/generate_full_dump_job.rb b/app/jobs/generate_full_dump_job.rb index 144fe8c8..05217a13 100644 --- a/app/jobs/generate_full_dump_job.rb +++ b/app/jobs/generate_full_dump_job.rb @@ -33,7 +33,7 @@ def self.enqueue_some(older_than: 4.weeks, maximum: 2) end # rubocop:enable Metrics/AbcSize,Metrics/CyclomaticComplexity - # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity + # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity def perform(stream, effective_date: Time.zone.now, publish: true) uploads = stream.uploads.active @@ -49,8 +49,7 @@ def perform(stream, effective_date: Time.zone.now, publish: true) normalized_dump = full_dump.build_normalized_dump(stream: stream) base_name = "#{stream.organization.slug}#{"-#{stream.slug}" unless stream.default?}-#{Time.zone.today}-full" - writer = MarcRecordWriterService.new(base_name) - oai_writer = ChunkedOaiMarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date) + writer = MarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date) begin NormalizedMarcRecordReader.new(uploads).each_slice(1000) do |records| @@ -63,19 +62,13 @@ def perform(stream, effective_date: Time.zone.now, publish: true) next if record.status == 'delete' writer.write_marc_record(record) - oai_writer.write_marc_record(record) end job_tracker.increment(records.size) end - oai_writer.finalize writer.finalize - writer.files.each do |as, file| - normalized_dump.public_send(as).attach(io: File.open(file), filename: human_readable_filename(base_name, as)) - end - normalized_dump.update(published_at: effective_date) full_dump.published_at = Time.zone.now if publish @@ -85,25 +78,7 @@ def perform(stream, effective_date: Time.zone.now, publish: true) ensure writer.close writer.unlink - - oai_writer.close - oai_writer.unlink end end - # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity - - def human_readable_filename(base_name, file_type) - as = case file_type - when :deletes - 'deletes.del.txt' - when :marc21 - 'marc21.mrc.gz' - when :marcxml - 'marcxml.xml.gz' - else - "#{file_type}.gz" - end - - "#{base_name}-#{as}" - end + # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity end diff --git a/app/jobs/generate_interstream_delta_dump_job.rb b/app/jobs/generate_interstream_delta_dump_job.rb index 0cb29a27..64df480c 100644 --- a/app/jobs/generate_interstream_delta_dump_job.rb +++ b/app/jobs/generate_interstream_delta_dump_job.rb @@ -25,8 +25,7 @@ def perform(previous_stream, stream, effective_date: Time.zone.now, publish: tru normalized_dump = interstream_delta.build_normalized_dump(stream: stream) base_name = "#{stream.organization.slug}-#{stream.slug}-#{Time.zone.today}-interstream-delta-#{previous_stream.slug}" - writer = MarcRecordWriterService.new(base_name) - oai_writer = ChunkedOaiMarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date) + writer = MarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date) job_tracker.update(total: previous_uploads.sum(&:marc_records_count)) @@ -39,7 +38,6 @@ def perform(previous_stream, stream, effective_date: Time.zone.now, publish: tru deleted_records.each do |marc001| writer.write_delete(previous_records_by_marc001[marc001]) - oai_writer.write_delete(previous_records_by_marc001[marc001]) end new_records = current_records.reject do |record| @@ -51,20 +49,12 @@ def perform(previous_stream, stream, effective_date: Time.zone.now, publish: tru next if new_record.status == 'delete' writer.write_marc_record(new_record) - oai_writer.write_marc_record(new_record) end job_tracker.increment(previous_records.size) end writer.finalize - oai_writer.finalize - - writer.files.each do |as, file| - normalized_dump.public_send(as).attach(io: File.open(file), - filename: human_readable_filename(base_name, as)) - end - normalized_dump.update(published_at: effective_date) interstream_delta.published_at = Time.zone.now if publish @@ -72,9 +62,6 @@ def perform(previous_stream, stream, effective_date: Time.zone.now, publish: tru ensure writer.close writer.unlink - - oai_writer.close - oai_writer.unlink end private diff --git a/app/services/marc_record_writer_service.rb b/app/services/marc_record_writer_service.rb index 42ddc48c..bf8e2f67 100644 --- a/app/services/marc_record_writer_service.rb +++ b/app/services/marc_record_writer_service.rb @@ -1,11 +1,13 @@ # frozen_string_literal: true # Utility class for serializing MARC records to files -class MarcRecordWriterService - attr_reader :base_name, :files +class MarcRecordWriterService # rubocop:disable Metrics/ClassLength + attr_reader :base_name, :files, :dump, :now - def initialize(base_name = nil, files: { errata: nil, marcxml: nil, marc21: nil, deletes: nil }) + def initialize(base_name = nil, dump:, now: Time.zone.now, files: { errata: nil, marcxml: nil, marc21: nil, deletes: nil }) @base_name = base_name + @dump = dump + @now = now @files = files.compact @opened_files = [] @writers = {} @@ -16,10 +18,12 @@ def write_marc_record(record) write_marc21_record(record) write_marcxml_record(record) + write_oai_record(record) end def write_delete(record) deletes_writer.puts(record.marc001) + oai_writer.write_delete(record) end def write_errata(message) @@ -27,21 +31,36 @@ def write_errata(message) end def finalize + oai_writer.finalize + @writers.each_value(&:close) + + files.each do |as, file| + dump.public_send(as).attach(io: File.open(file), filename: human_readable_filename(base_name, as)) + end end def close @opened_files.each(&:close) + + oai_writer.close end def unlink @opened_files.each(&:unlink) + oai_writer.unlink FileUtils.rm_rf tempdir end private + def write_oai_record(record) + oai_writer.write_marc_record(record) + rescue StandardError => e + write_errata("#{record.marc001}: #{e}") + end + def write_marc21_record(record) writer(:marc21).write(split_marc(record.augmented_marc)) rescue StandardError => e @@ -70,6 +89,10 @@ def deletes_writer @writers[:deletes] ||= file(:deletes) end + def oai_writer + @writers[:oai] ||= ChunkedOaiMarcRecordWriterService.new(base_name, dump: dump, now: now) + end + def gzipped_temp_file(name) Zlib::GzipWriter.new(temp_file(name)) end @@ -102,4 +125,19 @@ def valid?(record) def tempdir @tempdir ||= Dir.mktmpdir(base_name || 'marc_record_writer', Settings.marc_record_writer_tmpdir || Dir.tmpdir) end + + def human_readable_filename(base_name, file_type) + as = case file_type + when :deletes + 'deletes.del.txt' + when :marc21 + 'marc21.mrc.gz' + when :marcxml + 'marcxml.xml.gz' + else + "#{file_type}.gz" + end + + "#{base_name}-#{as}" + end end