Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 1 addition & 32 deletions app/jobs/generate_delta_dump_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ def perform(stream, from_date: nil, effective_date: Time.zone.now, publish: true
normalized_dump = delta_dump.build_normalized_dump(stream: stream)

base_name = "#{stream.organization.slug}#{"-#{stream.slug}" unless stream.default?}-#{effective_date.strftime('%FT%T')}-delta"
writer = MarcRecordWriterService.new(base_name)
oai_writer = ChunkedOaiMarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date)
writer = MarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date)

job_tracker.update(total: uploads.sum(&:marc_records_count))

Expand All @@ -43,23 +42,15 @@ def perform(stream, from_date: nil, effective_date: Time.zone.now, publish: true
records.each do |record|
if record.status == 'delete'
writer.write_delete(record)
oai_writer.write_delete(record)
else
writer.write_marc_record(record)
oai_writer.write_marc_record(record)
end
end

job_tracker.increment(records.size)
end

writer.finalize
oai_writer.finalize

writer.files.each do |as, file|
normalized_dump.public_send(as).attach(io: File.open(file),
filename: human_readable_filename(base_name, as))
end

normalized_dump.update(published_at: effective_date)
delta_dump.published_at = Time.zone.now if publish
Expand All @@ -68,29 +59,7 @@ def perform(stream, from_date: nil, effective_date: Time.zone.now, publish: true
ensure
writer.close
writer.unlink

oai_writer.close
oai_writer.unlink
end
end
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity

private

def human_readable_filename(base_name, file_type, counter = nil)
as = case file_type
when :deletes
'deletes.del.txt'
when :marc21
'marc21.mrc.gz'
when :marcxml
'marcxml.xml.gz'
when :oai_xml
"oai-#{format('%010d', counter)}.xml.gz"
else
"#{file_type}.gz"
end

"#{base_name}-#{as}"
end
end
31 changes: 3 additions & 28 deletions app/jobs/generate_full_dump_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def self.enqueue_some(older_than: 4.weeks, maximum: 2)
end
# rubocop:enable Metrics/AbcSize,Metrics/CyclomaticComplexity

# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity
def perform(stream, effective_date: Time.zone.now, publish: true)
uploads = stream.uploads.active

Expand All @@ -49,8 +49,7 @@ def perform(stream, effective_date: Time.zone.now, publish: true)
normalized_dump = full_dump.build_normalized_dump(stream: stream)

base_name = "#{stream.organization.slug}#{"-#{stream.slug}" unless stream.default?}-#{Time.zone.today}-full"
writer = MarcRecordWriterService.new(base_name)
oai_writer = ChunkedOaiMarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date)
writer = MarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date)

begin
NormalizedMarcRecordReader.new(uploads).each_slice(1000) do |records|
Expand All @@ -63,19 +62,13 @@ def perform(stream, effective_date: Time.zone.now, publish: true)
next if record.status == 'delete'

writer.write_marc_record(record)
oai_writer.write_marc_record(record)
end

job_tracker.increment(records.size)
end

oai_writer.finalize
writer.finalize

writer.files.each do |as, file|
normalized_dump.public_send(as).attach(io: File.open(file), filename: human_readable_filename(base_name, as))
end

normalized_dump.update(published_at: effective_date)
full_dump.published_at = Time.zone.now if publish

Expand All @@ -85,25 +78,7 @@ def perform(stream, effective_date: Time.zone.now, publish: true)
ensure
writer.close
writer.unlink

oai_writer.close
oai_writer.unlink
end
end
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity

def human_readable_filename(base_name, file_type)
as = case file_type
when :deletes
'deletes.del.txt'
when :marc21
'marc21.mrc.gz'
when :marcxml
'marcxml.xml.gz'
else
"#{file_type}.gz"
end

"#{base_name}-#{as}"
end
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity
end
15 changes: 1 addition & 14 deletions app/jobs/generate_interstream_delta_dump_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ def perform(previous_stream, stream, effective_date: Time.zone.now, publish: tru
normalized_dump = interstream_delta.build_normalized_dump(stream: stream)

base_name = "#{stream.organization.slug}-#{stream.slug}-#{Time.zone.today}-interstream-delta-#{previous_stream.slug}"
writer = MarcRecordWriterService.new(base_name)
oai_writer = ChunkedOaiMarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date)
writer = MarcRecordWriterService.new(base_name, dump: normalized_dump, now: effective_date)

job_tracker.update(total: previous_uploads.sum(&:marc_records_count))

Expand All @@ -39,7 +38,6 @@ def perform(previous_stream, stream, effective_date: Time.zone.now, publish: tru

deleted_records.each do |marc001|
writer.write_delete(previous_records_by_marc001[marc001])
oai_writer.write_delete(previous_records_by_marc001[marc001])
end

new_records = current_records.reject do |record|
Expand All @@ -51,30 +49,19 @@ def perform(previous_stream, stream, effective_date: Time.zone.now, publish: tru
next if new_record.status == 'delete'

writer.write_marc_record(new_record)
oai_writer.write_marc_record(new_record)
end

job_tracker.increment(previous_records.size)
end

writer.finalize
oai_writer.finalize

writer.files.each do |as, file|
normalized_dump.public_send(as).attach(io: File.open(file),
filename: human_readable_filename(base_name, as))
end

normalized_dump.update(published_at: effective_date)
interstream_delta.published_at = Time.zone.now if publish

interstream_delta.save!
ensure
writer.close
writer.unlink

oai_writer.close
oai_writer.unlink
end

private
Expand Down
44 changes: 41 additions & 3 deletions app/services/marc_record_writer_service.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# frozen_string_literal: true

# Utility class for serializing MARC records to files
class MarcRecordWriterService
attr_reader :base_name, :files
class MarcRecordWriterService # rubocop:disable Metrics/ClassLength
attr_reader :base_name, :files, :dump, :now

def initialize(base_name = nil, files: { errata: nil, marcxml: nil, marc21: nil, deletes: nil })
def initialize(base_name = nil, dump:, now: Time.zone.now, files: { errata: nil, marcxml: nil, marc21: nil, deletes: nil })
@base_name = base_name
@dump = dump
@now = now
@files = files.compact
@opened_files = []
@writers = {}
Expand All @@ -16,32 +18,49 @@ def write_marc_record(record)

write_marc21_record(record)
write_marcxml_record(record)
write_oai_record(record)
end

def write_delete(record)
deletes_writer.puts(record.marc001)
oai_writer.write_delete(record)
end

def write_errata(message)
writer(:errata).puts(message)
end

def finalize
oai_writer.finalize

@writers.each_value(&:close)

files.each do |as, file|
dump.public_send(as).attach(io: File.open(file), filename: human_readable_filename(base_name, as))
end
end

def close
@opened_files.each(&:close)

oai_writer.close
end

def unlink
@opened_files.each(&:unlink)
oai_writer.unlink

FileUtils.rm_rf tempdir
end

private

def write_oai_record(record)
oai_writer.write_marc_record(record)
rescue StandardError => e
write_errata("#{record.marc001}: #{e}")
end

def write_marc21_record(record)
writer(:marc21).write(split_marc(record.augmented_marc))
rescue StandardError => e
Expand Down Expand Up @@ -70,6 +89,10 @@ def deletes_writer
@writers[:deletes] ||= file(:deletes)
end

def oai_writer
@writers[:oai] ||= ChunkedOaiMarcRecordWriterService.new(base_name, dump: dump, now: now)
end

def gzipped_temp_file(name)
Zlib::GzipWriter.new(temp_file(name))
end
Expand Down Expand Up @@ -102,4 +125,19 @@ def valid?(record)
def tempdir
@tempdir ||= Dir.mktmpdir(base_name || 'marc_record_writer', Settings.marc_record_writer_tmpdir || Dir.tmpdir)
end

def human_readable_filename(base_name, file_type)
as = case file_type
when :deletes
'deletes.del.txt'
when :marc21
'marc21.mrc.gz'
when :marcxml
'marcxml.xml.gz'
else
"#{file_type}.gz"
end

"#{base_name}-#{as}"
end
end