From 0ac5084efdd0439aa5ea774c7d76dc4340b14718 Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Thu, 26 Feb 2026 18:17:09 +0100 Subject: [PATCH 01/16] add paper_trail cleanup rake task --- app/models/fmu.rb | 2 +- app/models/operator.rb | 2 +- lib/tasks/paper_trail.rake | 95 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 lib/tasks/paper_trail.rake diff --git a/app/models/fmu.rb b/app/models/fmu.rb index 3030da66..f28fbd07 100644 --- a/app/models/fmu.rb +++ b/app/models/fmu.rb @@ -24,7 +24,7 @@ # class Fmu < ApplicationRecord - has_paper_trail + has_paper_trail ignore: %i[geometry geojson] acts_as_paranoid include EsriShapefileUpload diff --git a/app/models/operator.rb b/app/models/operator.rb index 4e0fe4d9..f2853f54 100644 --- a/app/models/operator.rb +++ b/app/models/operator.rb @@ -25,7 +25,7 @@ # class Operator < ApplicationRecord - has_paper_trail + has_paper_trail ignore: %i[country_doc_rank country_operators] mount_base64_uploader :logo, LogoUploader attr_accessor :delete_logo diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake new file mode 100644 index 00000000..7cd125b4 --- /dev/null +++ b/lib/tasks/paper_trail.rake @@ -0,0 +1,95 @@ +namespace :paper_trail do + desc "Clean Operator versions - delete where only ignored fields changed, strip those fields from object_changes in the rest. Set FOR_REAL=true to apply." + task clean_operators: :environment do + for_real = ENV["FOR_REAL"] == "true" + + puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" + + stripped_fields = %w[ + percentage_valid_documents_all + percentage_valid_documents_fmu + percentage_valid_documents_country + country_doc_rank + country_operators + ] + ignored_fields = stripped_fields + %w[updated_at] + ids_to_delete = [] + ids_to_strip = [] + + PaperTrail::Version.where(item_type: "Operator", event: "update").find_each do |version| + next if version.object_changes.blank? + + changed_keys = version.changeset.keys + next unless (changed_keys & stripped_fields).any? + + if (changed_keys - ignored_fields).empty? + ids_to_delete << version.id + else + ids_to_strip << version.id + end + end + + puts "Found #{ids_to_delete.size} versions to delete (only ignored fields changed)." + puts "Found #{ids_to_strip.size} versions to strip ignored fields from." + + if for_real + if ids_to_delete.any? + PaperTrail::Version.where(id: ids_to_delete).delete_all + puts "Deleted #{ids_to_delete.size} versions." + end + + if ids_to_strip.any? + PaperTrail::Version.where(id: ids_to_strip).find_each do |version| + changes = version.changeset + stripped_fields.each { |f| changes.delete(f) } + version.update_column(:object_changes, PaperTrail.serializer.dump(changes)) + end + puts "Stripped ignored fields from #{ids_to_strip.size} versions." + end + end + end + + desc "Clean Fmu versions - delete where only ignored fields changed, strip those fields from object_changes in the rest. Set FOR_REAL=true to apply." + task clean_fmus: :environment do + for_real = ENV["FOR_REAL"] == "true" + + puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" + + stripped_fields = %w[geometry geojson] + ignored_fields = stripped_fields + %w[updated_at] + ids_to_delete = [] + ids_to_strip = [] + + PaperTrail::Version.where(item_type: "Fmu", event: "update").find_each do |version| + next if version.object_changes.blank? + + changes = YAML.unsafe_load(version.object_changes) + next unless (changes.keys & stripped_fields).any? + + if (changes.keys - ignored_fields).empty? + ids_to_delete << version.id + else + ids_to_strip << version.id + end + end + + puts "Found #{ids_to_delete.size} versions to delete (only ignored fields changed)." + puts "Found #{ids_to_strip.size} versions to strip ignored fields from." + + if for_real + if ids_to_delete.any? + PaperTrail::Version.where(id: ids_to_delete).delete_all + puts "Deleted #{ids_to_delete.size} versions." + end + + if ids_to_strip.any? + PaperTrail::Version.where(id: ids_to_strip).find_each do |version| + changes = YAML.unsafe_load(version.object_changes) + stripped_fields.each { |f| changes.delete(f) } + version.update_column(:object_changes, PaperTrail.serializer.dump(changes)) + end + puts "Stripped ignored fields from #{ids_to_strip.size} versions." + end + end + end +end From 03f9b7ae400b551d4807f820b5e0ab3c1640acfb Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Thu, 26 Feb 2026 19:17:28 +0100 Subject: [PATCH 02/16] refactor paper trail rake task --- lib/tasks/paper_trail.rake | 101 +++++++++++++------------------------ 1 file changed, 36 insertions(+), 65 deletions(-) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index 7cd125b4..dc8e5488 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -1,94 +1,65 @@ -namespace :paper_trail do - desc "Clean Operator versions - delete where only ignored fields changed, strip those fields from object_changes in the rest. Set FOR_REAL=true to apply." - task clean_operators: :environment do - for_real = ENV["FOR_REAL"] == "true" - - puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" - - stripped_fields = %w[ +PAPER_TRAIL_CLEAN_CONFIG = [ + { + item_type: "Operator", + stripped_fields: %w[ percentage_valid_documents_all percentage_valid_documents_fmu percentage_valid_documents_country country_doc_rank country_operators ] - ignored_fields = stripped_fields + %w[updated_at] - ids_to_delete = [] - ids_to_strip = [] - - PaperTrail::Version.where(item_type: "Operator", event: "update").find_each do |version| - next if version.object_changes.blank? - - changed_keys = version.changeset.keys - next unless (changed_keys & stripped_fields).any? - - if (changed_keys - ignored_fields).empty? - ids_to_delete << version.id - else - ids_to_strip << version.id - end - end - - puts "Found #{ids_to_delete.size} versions to delete (only ignored fields changed)." - puts "Found #{ids_to_strip.size} versions to strip ignored fields from." - - if for_real - if ids_to_delete.any? - PaperTrail::Version.where(id: ids_to_delete).delete_all - puts "Deleted #{ids_to_delete.size} versions." - end + }, + { + item_type: "Fmu", + stripped_fields: %w[geometry geojson] + } +].freeze - if ids_to_strip.any? - PaperTrail::Version.where(id: ids_to_strip).find_each do |version| - changes = version.changeset - stripped_fields.each { |f| changes.delete(f) } - version.update_column(:object_changes, PaperTrail.serializer.dump(changes)) - end - puts "Stripped ignored fields from #{ids_to_strip.size} versions." - end - end - end - - desc "Clean Fmu versions - delete where only ignored fields changed, strip those fields from object_changes in the rest. Set FOR_REAL=true to apply." - task clean_fmus: :environment do +namespace :paper_trail do + desc "Clean versions for all models - delete where only ignored fields changed, strip those fields from the rest. Set FOR_REAL=true to apply." + task clean_up: :environment do for_real = ENV["FOR_REAL"] == "true" puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" - stripped_fields = %w[geometry geojson] - ignored_fields = stripped_fields + %w[updated_at] - ids_to_delete = [] - ids_to_strip = [] + PAPER_TRAIL_CLEAN_CONFIG.each do |config| + item_type = config[:item_type] + stripped_fields = config[:stripped_fields] + ignored_fields = stripped_fields + %w[updated_at] + ids_to_delete = [] + ids_to_strip = [] - PaperTrail::Version.where(item_type: "Fmu", event: "update").find_each do |version| - next if version.object_changes.blank? + PaperTrail::Version.where(item_type: item_type, event: "update").find_each do |version| + next if version.object_changes.blank? - changes = YAML.unsafe_load(version.object_changes) - next unless (changes.keys & stripped_fields).any? + changes = version.changeset + next unless (changes.keys & stripped_fields).any? - if (changes.keys - ignored_fields).empty? - ids_to_delete << version.id - else - ids_to_strip << version.id + if (changes.keys - ignored_fields).empty? + ids_to_delete << version.id + else + ids_to_strip << version.id + end end - end - puts "Found #{ids_to_delete.size} versions to delete (only ignored fields changed)." - puts "Found #{ids_to_strip.size} versions to strip ignored fields from." + puts "\n#{item_type}:" + puts " #{ids_to_delete.size} versions to delete (only ignored fields changed)" + puts " #{ids_to_strip.size} versions to strip ignored fields from" + + next unless for_real - if for_real if ids_to_delete.any? PaperTrail::Version.where(id: ids_to_delete).delete_all - puts "Deleted #{ids_to_delete.size} versions." + puts " Deleted." end if ids_to_strip.any? PaperTrail::Version.where(id: ids_to_strip).find_each do |version| - changes = YAML.unsafe_load(version.object_changes) + changes = version.changeset stripped_fields.each { |f| changes.delete(f) } version.update_column(:object_changes, PaperTrail.serializer.dump(changes)) end - puts "Stripped ignored fields from #{ids_to_strip.size} versions." + puts " Stripped." end end end From bc9b26a4d5331231fd2344a84200c696911832a8 Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Fri, 27 Feb 2026 09:23:06 +0100 Subject: [PATCH 03/16] paper_trail cleanup: add deduplicate task --- lib/tasks/paper_trail.rake | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index dc8e5488..b7909f3d 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -63,4 +63,37 @@ namespace :paper_trail do end end end + + desc "Remove duplicate versions with identical object_changes created within 3 seconds of each other. Set FOR_REAL=true to apply." + task deduplicate: :environment do + for_real = ENV["FOR_REAL"] == "true" + + puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" + + ids_to_delete = ActiveRecord::Base.connection.select_values(<<~SQL) + WITH ranked AS ( + SELECT id, + ROW_NUMBER() OVER ( + PARTITION BY item_type, item_id, object_changes + ORDER BY id + ) AS rn, + created_at - FIRST_VALUE(created_at) OVER ( + PARTITION BY item_type, item_id, object_changes + ORDER BY id + ) AS age_within_group + FROM versions + WHERE event = 'update' + ) + SELECT id FROM ranked + WHERE rn > 1 + AND EXTRACT(EPOCH FROM age_within_group) <= 3 + SQL + + puts "Found #{ids_to_delete.size} duplicate versions." + + if for_real && ids_to_delete.any? + PaperTrail::Version.where(id: ids_to_delete).delete_all + puts "Deleted." + end + end end From 7e5d32ed6614efa66c762d16274ed2dd6f9f93ef Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Fri, 27 Feb 2026 15:06:08 +0100 Subject: [PATCH 04/16] refactor deduplicate --- lib/tasks/paper_trail.rake | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index b7909f3d..bc7f053b 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -7,6 +7,8 @@ PAPER_TRAIL_CLEAN_CONFIG = [ percentage_valid_documents_country country_doc_rank country_operators + score_absolute + obs_per_visit ] }, { @@ -71,22 +73,18 @@ namespace :paper_trail do puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" ids_to_delete = ActiveRecord::Base.connection.select_values(<<~SQL) - WITH ranked AS ( + WITH lagged AS ( SELECT id, - ROW_NUMBER() OVER ( - PARTITION BY item_type, item_id, object_changes - ORDER BY id - ) AS rn, - created_at - FIRST_VALUE(created_at) OVER ( - PARTITION BY item_type, item_id, object_changes - ORDER BY id - ) AS age_within_group + object_changes, + created_at, + LAG(object_changes) OVER (PARTITION BY item_type, item_id ORDER BY created_at, id) AS prev_object_changes, + LAG(created_at) OVER (PARTITION BY item_type, item_id ORDER BY created_at, id) AS prev_created_at FROM versions WHERE event = 'update' ) - SELECT id FROM ranked - WHERE rn > 1 - AND EXTRACT(EPOCH FROM age_within_group) <= 3 + SELECT id FROM lagged + WHERE object_changes = prev_object_changes + AND EXTRACT(EPOCH FROM (created_at - prev_created_at)) <= 3 SQL puts "Found #{ids_to_delete.size} duplicate versions." From 1eaa06c03d88eff24c53a831c59e7ac3e74722a7 Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Wed, 4 Mar 2026 12:02:18 +0100 Subject: [PATCH 05/16] update geojson properties that won't come up in paper trail history --- app/models/fmu.rb | 16 +++++++++------- app/models/observation.rb | 2 +- app/models/operator.rb | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/app/models/fmu.rb b/app/models/fmu.rb index f28fbd07..384a4148 100644 --- a/app/models/fmu.rb +++ b/app/models/fmu.rb @@ -24,7 +24,7 @@ # class Fmu < ApplicationRecord - has_paper_trail ignore: %i[geometry geojson] + has_paper_trail skip: %i[geometry] acts_as_paranoid include EsriShapefileUpload @@ -46,13 +46,12 @@ class Fmu < ApplicationRecord accepts_nested_attributes_for :operators accepts_nested_attributes_for :fmu_operator, reject_if: proc { |attributes| attributes["operator_id"].blank? } - before_validation :update_geojson_properties - validates :name, presence: true validates :forest_type, presence: true validates :geojson, geojson: true, if: :geojson_changed? after_save :update_geometry, if: :saved_change_to_geojson? + after_save :update_geojson_properties # TODO Redo all of those scope :filter_by_countries, ->(country_ids) { where(country_id: country_ids.split(",")) } @@ -82,14 +81,12 @@ def fetch_all(options) end def update_geojson_properties - return if geojson.blank? - fmu_type_label = begin ForestType::TYPES[forest_type.to_sym][:geojson_label] rescue "" end - geojson["properties"] = (geojson["properties"] || {}).merge({ + new_properties = { "id" => id, "fmu_name" => name, "iso3_fmu" => country&.iso, @@ -106,7 +103,12 @@ def update_geojson_properties "observations" => active_observations.reload.uniq.count, "forest_type" => forest_type, "fmu_type_label" => fmu_type_label # old one deprecated, to be removed in the future - }) + } + # we want to omit tracking those changes in properties in the paper trail + # we only will track changes to geojson->geometry via upload + self.class.unscoped.where(id: id).where.not(geojson: nil).update_all( + ["geojson = jsonb_set(geojson, '{properties}', COALESCE(geojson->'properties', '{}') || ?::jsonb, true)", new_properties.to_json] + ) end def properties diff --git a/app/models/observation.rb b/app/models/observation.rb index 57a200a9..9c351eb5 100644 --- a/app/models/observation.rb +++ b/app/models/observation.rb @@ -401,7 +401,7 @@ def update_fmu_geojson return unless fmu_id fmu.update_geojson_properties - fmu.save + fmu.touch # to update cache end def assign_observers_from_report diff --git a/app/models/operator.rb b/app/models/operator.rb index f2853f54..150c0b86 100644 --- a/app/models/operator.rb +++ b/app/models/operator.rb @@ -25,7 +25,7 @@ # class Operator < ApplicationRecord - has_paper_trail ignore: %i[country_doc_rank country_operators] + has_paper_trail skip: %i[country_doc_rank country_operators] mount_base64_uploader :logo, LogoUploader attr_accessor :delete_logo From 69c79f9cd4a4bccb29156f71f5e734d659dd7284 Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Wed, 4 Mar 2026 12:33:47 +0100 Subject: [PATCH 06/16] add item_type for clean_up tasks --- lib/tasks/paper_trail.rake | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index bc7f053b..61307733 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -9,35 +9,40 @@ PAPER_TRAIL_CLEAN_CONFIG = [ country_operators score_absolute obs_per_visit + updated_at ] }, { item_type: "Fmu", - stripped_fields: %w[geometry geojson] + stripped_fields: %w[geometry updated_at] } ].freeze namespace :paper_trail do - desc "Clean versions for all models - delete where only ignored fields changed, strip those fields from the rest. Set FOR_REAL=true to apply." + desc "Clean versions for all models - delete where only ignored fields changed, strip those fields from the rest. Set FOR_REAL=true to apply. Optionally filter with ITEM_TYPE=Foo." task clean_up: :environment do for_real = ENV["FOR_REAL"] == "true" + filter_item_type = ENV["ITEM_TYPE"] puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" + puts "Filtering to item_type=#{filter_item_type}" if filter_item_type - PAPER_TRAIL_CLEAN_CONFIG.each do |config| + configs = PAPER_TRAIL_CLEAN_CONFIG + configs = configs.select { |c| c[:item_type] == filter_item_type } if filter_item_type + + configs.each do |config| item_type = config[:item_type] stripped_fields = config[:stripped_fields] - ignored_fields = stripped_fields + %w[updated_at] ids_to_delete = [] ids_to_strip = [] - PaperTrail::Version.where(item_type: item_type, event: "update").find_each do |version| + PaperTrail::Version.where(event: "update", item_type: item_type).find_each do |version| next if version.object_changes.blank? changes = version.changeset next unless (changes.keys & stripped_fields).any? - if (changes.keys - ignored_fields).empty? + if (changes.keys - stripped_fields).empty? ids_to_delete << version.id else ids_to_strip << version.id From 2edfbad9f47383b000615dcddb2d1cd60aa218cb Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Wed, 4 Mar 2026 13:03:48 +0100 Subject: [PATCH 07/16] paper trail cleanup: merge old translations tasky --- lib/tasks/paper_trail.rake | 77 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index 61307733..8cf9d19f 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -18,6 +18,17 @@ PAPER_TRAIL_CLEAN_CONFIG = [ } ].freeze +PAPER_TRAIL_MERGE_TRANSLATIONS_CONFIG = [ + { + item_type: "Fmu", + translated_fields: %w[name] + }, + { + item_type: "Operator", + translated_fields: %w[name details] + } +].freeze + namespace :paper_trail do desc "Clean versions for all models - delete where only ignored fields changed, strip those fields from the rest. Set FOR_REAL=true to apply. Optionally filter with ITEM_TYPE=Foo." task clean_up: :environment do @@ -99,4 +110,70 @@ namespace :paper_trail do puts "Deleted." end end + + desc "Merge Translation versions (locale: en) into parent model versions, then delete all translation versions. Set FOR_REAL=true to apply. Optionally filter with ITEM_TYPE=Foo." + task merge_old_translations: :environment do + for_real = ENV["FOR_REAL"] == "true" + filter_item_type = ENV["ITEM_TYPE"] + + puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" + puts "Filtering to item_type=#{filter_item_type}" if filter_item_type + + configs = PAPER_TRAIL_MERGE_TRANSLATIONS_CONFIG + configs = configs.select { |c| c[:item_type] == filter_item_type } if filter_item_type + + configs.each do |config| + item_type = config[:item_type] + translated_fields = config[:translated_fields] + translation_item_type = "#{item_type}::Translation" + foreign_key = "#{item_type.downcase}_id" + + new_versions = [] + skipped = 0 + + translation_versions = PaperTrail::Version.where(item_type: translation_item_type, locale: "en", event: "update") + + translation_versions.find_each do |version| + parent_id = if version.object.present? + PaperTrail.serializer.load(version.object)[foreign_key] + elsif version.object_changes.present? + PaperTrail.serializer.load(version.object_changes)[foreign_key]&.last + end + + unless parent_id + skipped += 1 + next + end + + relevant_changes = PaperTrail.serializer.load(version.object_changes).slice(*translated_fields) + + next if relevant_changes.empty? + + new_versions << { + item_type: item_type, + item_id: parent_id, + event: version.event, + whodunnit: version.whodunnit, + object_changes: PaperTrail.serializer.dump(relevant_changes), + created_at: version.created_at + } + end + + total = PaperTrail::Version.where(item_type: translation_item_type).count + total_update_en = PaperTrail::Version.where(item_type: translation_item_type, locale: "en", event: "update").count + puts "\n#{item_type}:" + puts " #{translation_item_type} versions total: #{total}" + puts " #{translation_item_type} versions locale en and event update: #{total_update_en}" + puts " #{new_versions.size} to convert to #{item_type} versions" + puts " #{skipped} skipped due to missing #{foreign_key}" + + next unless for_real + + PaperTrail::Version.insert_all(new_versions) if new_versions.any? + puts " Created #{new_versions.size} #{item_type} versions." + + PaperTrail::Version.where(item_type: translation_item_type).delete_all + puts " Deleted #{total} #{translation_item_type} versions." + end + end end From 94df001267de28418bf31e1f71f225e223f5747a Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Wed, 4 Mar 2026 14:03:52 +0100 Subject: [PATCH 08/16] paper trail cleanup: another task that will remove not needed geojson changes from history --- lib/tasks/paper_trail.rake | 50 +++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index 8cf9d19f..39936418 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -14,7 +14,7 @@ PAPER_TRAIL_CLEAN_CONFIG = [ }, { item_type: "Fmu", - stripped_fields: %w[geometry updated_at] + stripped_fields: %w[geometry properties updated_at] } ].freeze @@ -176,4 +176,52 @@ namespace :paper_trail do puts " Deleted #{total} #{translation_item_type} versions." end end + + desc "Strip geojson['properties'] from Fmu version object_changes. Deletes version if geojson is the only change and becomes identical after stripping. Set FOR_REAL=true to apply." + task strip_fmu_geojson_properties: :environment do + for_real = ENV["FOR_REAL"] == "true" + + puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" + + ids_to_delete = [] + ids_to_update = {} + + PaperTrail::Version.where(item_type: "Fmu", event: "update").find_each do |version| + next if version.object_changes.blank? + + changes = PaperTrail.serializer.load(version.object_changes) + next unless changes.key?("geojson") + + old_geojson, new_geojson = changes["geojson"] + old_geojson = JSON.parse(old_geojson) if old_geojson.is_a?(String) + new_geojson = JSON.parse(new_geojson) if new_geojson.is_a?(String) + old_geojson&.delete("properties") + new_geojson&.delete("properties") + + if old_geojson == new_geojson + changes.delete("geojson") + else + changes["geojson"] = [old_geojson&.to_json, new_geojson&.to_json] + end + + if (changes.keys - %w[updated_at]).empty? + ids_to_delete << version.id + else + ids_to_update[version.id] = PaperTrail.serializer.dump(changes) + end + end + + puts "#{ids_to_delete.size} versions to delete (geojson-only change, properties was the only diff)" + puts "#{ids_to_update.size} versions to update (strip properties from geojson)" + + next unless for_real + + PaperTrail::Version.where(id: ids_to_delete).delete_all if ids_to_delete.any? + puts "Deleted #{ids_to_delete.size} versions." + + ids_to_update.each do |id, object_changes| + PaperTrail::Version.where(id: id).update_all(object_changes: object_changes) + end + puts "Updated #{ids_to_update.size} versions." + end end From fc1eac381f75c48d7d3defceff2d5a796be02f09 Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Wed, 4 Mar 2026 14:17:49 +0100 Subject: [PATCH 09/16] add run all task --- lib/tasks/paper_trail.rake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index 39936418..42673a91 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -30,6 +30,9 @@ PAPER_TRAIL_MERGE_TRANSLATIONS_CONFIG = [ ].freeze namespace :paper_trail do + desc "Run all paper_trail cleanup tasks in order. Set FOR_REAL=true to apply." + task clean_up_all: %i[strip_fmu_geojson_properties clean_up deduplicate] + desc "Clean versions for all models - delete where only ignored fields changed, strip those fields from the rest. Set FOR_REAL=true to apply. Optionally filter with ITEM_TYPE=Foo." task clean_up: :environment do for_real = ENV["FOR_REAL"] == "true" From 2301bf5a81bd59d6546d295675c5da6c2344f154 Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Wed, 4 Mar 2026 16:30:55 +0100 Subject: [PATCH 10/16] paper trail: cleanup - add squash create and follow up update events --- lib/tasks/paper_trail.rake | 62 +++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index 42673a91..df428f5d 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -31,7 +31,7 @@ PAPER_TRAIL_MERGE_TRANSLATIONS_CONFIG = [ namespace :paper_trail do desc "Run all paper_trail cleanup tasks in order. Set FOR_REAL=true to apply." - task clean_up_all: %i[strip_fmu_geojson_properties clean_up deduplicate] + task clean_up_all: %i[merge_old_translations strip_fmu_geojson_properties squash_create_updates clean_up deduplicate] desc "Clean versions for all models - delete where only ignored fields changed, strip those fields from the rest. Set FOR_REAL=true to apply. Optionally filter with ITEM_TYPE=Foo." task clean_up: :environment do @@ -180,6 +180,66 @@ namespace :paper_trail do end end + desc "Merge update versions into preceding create versions when done by the same user within 3 seconds, then delete the update. Set FOR_REAL=true to apply." + task squash_create_updates: :environment do + for_real = ENV["FOR_REAL"] == "true" + + puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" + + exclude_models = ["Operator::Translation", "Fmu::Translation", "Observer::Translation"] + + pairs = ActiveRecord::Base.connection.select_rows(<<~SQL) + SELECT c.id, u.id + FROM versions c + JOIN versions u + ON u.item_type = c.item_type + AND u.item_id = c.item_id + AND u.event = 'update' + AND (u.whodunnit = c.whodunnit OR (u.whodunnit IS NULL AND c.whodunnit IS NULL)) + AND EXTRACT(EPOCH FROM (u.created_at - c.created_at)) BETWEEN 0 AND 3 + WHERE c.event = 'create' AND c.item_type NOT IN (#{exclude_models.map { |m| "'#{m}'" }.join(", ")}) + ORDER BY c.id, u.created_at, u.id + SQL + + grouped = pairs.each_with_object({}) do |(create_id, update_id), h| + (h[create_id.to_i] ||= []) << update_id.to_i + end + + puts "Found #{grouped.size} create versions with #{pairs.size} follow-up updates to squash." + + next unless for_real + + update_ids_to_delete = [] + + grouped.each do |create_id, update_ids| + create_version = PaperTrail::Version.find(create_id) + next if create_version.object_changes.blank? + + # create_changes = PaperTrail.serializer.load(create_version.object_changes) + create_changes = create_version.changeset + + # Accumulate all update changes in order; later updates overwrite earlier for the same field + update_changes = PaperTrail::Version.where(id: update_ids).order(:created_at, :id).each_with_object({}) do |update_version, acc| + next if update_version.object_changes.blank? + + update_ids_to_delete << update_version.id + update_changes = update_version.changeset + acc.merge!(update_changes) + end + + # For fields in create: keep nil as origin, use update's final value + # For fields only in update: treat as [nil, new_val] since record started with nil + merged = create_changes.merge(update_changes.transform_values { |(_old, new_val)| [nil, new_val] }) do |_field, create_pair, (_nil, new_val)| + [create_pair[0], new_val] + end + + create_version.update_column(:object_changes, PaperTrail.serializer.dump(merged)) + end + + PaperTrail::Version.where(id: update_ids_to_delete).delete_all if update_ids_to_delete.any? + puts "Squashed #{update_ids_to_delete.size} update versions into their create versions." + end + desc "Strip geojson['properties'] from Fmu version object_changes. Deletes version if geojson is the only change and becomes identical after stripping. Set FOR_REAL=true to apply." task strip_fmu_geojson_properties: :environment do for_real = ENV["FOR_REAL"] == "true" From 4e9f4412c774fecb7ce9575b732de03d1fcf61d2 Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Wed, 4 Mar 2026 18:00:11 +0100 Subject: [PATCH 11/16] small update to 3 min --- lib/tasks/paper_trail.rake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index df428f5d..4f4aa5b3 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -85,7 +85,7 @@ namespace :paper_trail do end end - desc "Remove duplicate versions with identical object_changes created within 3 seconds of each other. Set FOR_REAL=true to apply." + desc "Remove duplicate versions with identical object_changes created within 3 minutes of each other. Set FOR_REAL=true to apply." task deduplicate: :environment do for_real = ENV["FOR_REAL"] == "true" @@ -103,7 +103,7 @@ namespace :paper_trail do ) SELECT id FROM lagged WHERE object_changes = prev_object_changes - AND EXTRACT(EPOCH FROM (created_at - prev_created_at)) <= 3 + AND EXTRACT(EPOCH FROM (created_at - prev_created_at)) <= 3 * 60 SQL puts "Found #{ids_to_delete.size} duplicate versions." From 42e7faf266b0f94da0ac847cbea1b9de250ac38f Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Wed, 4 Mar 2026 18:14:38 +0100 Subject: [PATCH 12/16] paper trail: strip not only in updates --- lib/tasks/paper_trail.rake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index 4f4aa5b3..2ca41902 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -50,13 +50,13 @@ namespace :paper_trail do ids_to_delete = [] ids_to_strip = [] - PaperTrail::Version.where(event: "update", item_type: item_type).find_each do |version| + PaperTrail::Version.where(item_type: item_type).find_each do |version| next if version.object_changes.blank? changes = version.changeset next unless (changes.keys & stripped_fields).any? - if (changes.keys - stripped_fields).empty? + if (changes.keys - stripped_fields).empty? && version.event == "update" ids_to_delete << version.id else ids_to_strip << version.id @@ -249,7 +249,7 @@ namespace :paper_trail do ids_to_delete = [] ids_to_update = {} - PaperTrail::Version.where(item_type: "Fmu", event: "update").find_each do |version| + PaperTrail::Version.where(item_type: "Fmu").find_each do |version| next if version.object_changes.blank? changes = PaperTrail.serializer.load(version.object_changes) @@ -267,7 +267,7 @@ namespace :paper_trail do changes["geojson"] = [old_geojson&.to_json, new_geojson&.to_json] end - if (changes.keys - %w[updated_at]).empty? + if (changes.keys - %w[updated_at]).empty? && version.event == "update" ids_to_delete << version.id else ids_to_update[version.id] = PaperTrail.serializer.dump(changes) From 40a483d892a6e4caaa2e5883e86872c6116b3bf2 Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Thu, 5 Mar 2026 10:18:20 +0100 Subject: [PATCH 13/16] remove merge all translations for now, will do it later --- lib/tasks/paper_trail.rake | 72 ++------------------------------------ 1 file changed, 3 insertions(+), 69 deletions(-) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index 2ca41902..f7560aa9 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -31,7 +31,7 @@ PAPER_TRAIL_MERGE_TRANSLATIONS_CONFIG = [ namespace :paper_trail do desc "Run all paper_trail cleanup tasks in order. Set FOR_REAL=true to apply." - task clean_up_all: %i[merge_old_translations strip_fmu_geojson_properties squash_create_updates clean_up deduplicate] + task clean_up_all: %i[strip_fmu_geojson_properties clean_up deduplicate squash_create_updates] desc "Clean versions for all models - delete where only ignored fields changed, strip those fields from the rest. Set FOR_REAL=true to apply. Optionally filter with ITEM_TYPE=Foo." task clean_up: :environment do @@ -114,73 +114,7 @@ namespace :paper_trail do end end - desc "Merge Translation versions (locale: en) into parent model versions, then delete all translation versions. Set FOR_REAL=true to apply. Optionally filter with ITEM_TYPE=Foo." - task merge_old_translations: :environment do - for_real = ENV["FOR_REAL"] == "true" - filter_item_type = ENV["ITEM_TYPE"] - - puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" - puts "Filtering to item_type=#{filter_item_type}" if filter_item_type - - configs = PAPER_TRAIL_MERGE_TRANSLATIONS_CONFIG - configs = configs.select { |c| c[:item_type] == filter_item_type } if filter_item_type - - configs.each do |config| - item_type = config[:item_type] - translated_fields = config[:translated_fields] - translation_item_type = "#{item_type}::Translation" - foreign_key = "#{item_type.downcase}_id" - - new_versions = [] - skipped = 0 - - translation_versions = PaperTrail::Version.where(item_type: translation_item_type, locale: "en", event: "update") - - translation_versions.find_each do |version| - parent_id = if version.object.present? - PaperTrail.serializer.load(version.object)[foreign_key] - elsif version.object_changes.present? - PaperTrail.serializer.load(version.object_changes)[foreign_key]&.last - end - - unless parent_id - skipped += 1 - next - end - - relevant_changes = PaperTrail.serializer.load(version.object_changes).slice(*translated_fields) - - next if relevant_changes.empty? - - new_versions << { - item_type: item_type, - item_id: parent_id, - event: version.event, - whodunnit: version.whodunnit, - object_changes: PaperTrail.serializer.dump(relevant_changes), - created_at: version.created_at - } - end - - total = PaperTrail::Version.where(item_type: translation_item_type).count - total_update_en = PaperTrail::Version.where(item_type: translation_item_type, locale: "en", event: "update").count - puts "\n#{item_type}:" - puts " #{translation_item_type} versions total: #{total}" - puts " #{translation_item_type} versions locale en and event update: #{total_update_en}" - puts " #{new_versions.size} to convert to #{item_type} versions" - puts " #{skipped} skipped due to missing #{foreign_key}" - - next unless for_real - - PaperTrail::Version.insert_all(new_versions) if new_versions.any? - puts " Created #{new_versions.size} #{item_type} versions." - - PaperTrail::Version.where(item_type: translation_item_type).delete_all - puts " Deleted #{total} #{translation_item_type} versions." - end - end - - desc "Merge update versions into preceding create versions when done by the same user within 3 seconds, then delete the update. Set FOR_REAL=true to apply." + desc "Merge update versions into preceding create versions when done by the same user within 10 seconds, then delete the update. Set FOR_REAL=true to apply." task squash_create_updates: :environment do for_real = ENV["FOR_REAL"] == "true" @@ -196,7 +130,7 @@ namespace :paper_trail do AND u.item_id = c.item_id AND u.event = 'update' AND (u.whodunnit = c.whodunnit OR (u.whodunnit IS NULL AND c.whodunnit IS NULL)) - AND EXTRACT(EPOCH FROM (u.created_at - c.created_at)) BETWEEN 0 AND 3 + AND EXTRACT(EPOCH FROM (u.created_at - c.created_at)) BETWEEN 0 AND 10 WHERE c.event = 'create' AND c.item_type NOT IN (#{exclude_models.map { |m| "'#{m}'" }.join(", ")}) ORDER BY c.id, u.created_at, u.id SQL From 9b791a1f2a81d179468cd90d04fdc34811e7fbb3 Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Thu, 5 Mar 2026 18:35:37 +0100 Subject: [PATCH 14/16] add task to fix serialized uploaders in paper trail history --- lib/tasks/paper_trail.rake | 71 +++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/lib/tasks/paper_trail.rake b/lib/tasks/paper_trail.rake index f7560aa9..2f47c304 100644 --- a/lib/tasks/paper_trail.rake +++ b/lib/tasks/paper_trail.rake @@ -29,12 +29,78 @@ PAPER_TRAIL_MERGE_TRANSLATIONS_CONFIG = [ } ].freeze +# Known class renames between Rails versions stored in PaperTrail YAML +YAML_CLASS_SUBSTITUTIONS = [ + ["ActiveRecord::Attribute::", "ActiveModel::Attribute::"], + ["ActiveModel::Type::Text", "ActiveModel::Type::String"], + ["OperatorDocumentUploader", "DocumentFileUploader"], + [/LogoUploader::Uploader\d+/, "LogoUploader"] +].freeze + namespace :paper_trail do desc "Run all paper_trail cleanup tasks in order. Set FOR_REAL=true to apply." - task clean_up_all: %i[strip_fmu_geojson_properties clean_up deduplicate squash_create_updates] + task clean_up_all: %i[fix_yaml_serialization strip_fmu_geojson_properties clean_up deduplicate squash_create_updates] + + desc "Fix YAML serialization issues in PaperTrail versions caused by Rails class renames. also removes uploader objects. Set FOR_REAL=true to apply." + task fix_yaml_serialization: :environment do + puts "Fixing YAML serialization issues in PaperTrail versions caused by Rails class renames, and removing uploader objects..." + for_real = ENV["FOR_REAL"] == "true" + puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" + + scope = PaperTrail::Version.where( + "object LIKE '%!ruby/object:%' OR object_changes LIKE '%!ruby/object:%'" + ) + puts "Found #{scope.count} versions with serialized Ruby objects.\n\n" + + normalize_uploaders = ->(obj, key) do + obj.class.name.to_s.end_with?("Uploader") ? obj.model.read_attribute(key) : obj + end + + fixed = 0 + failed = 0 + + scope.find_each do |version| + updates = {} + + %i[object object_changes].each do |col| + raw = version.read_attribute(col) + next if raw.blank? + + substituted = raw.dup + YAML_CLASS_SUBSTITUTIONS.each { |old, new_name| substituted.gsub!(old, new_name) } + next if substituted == raw + + begin + loaded = PaperTrail.serializer.load(substituted) + normalized = loaded.map do |k, v| + if col == :object + [k, normalize_uploaders.call(v, k)] + elsif col == :object_changes && v.is_a?(Array) + [k, v.map { |change| normalize_uploaders.call(change, k) }] + else + raise "Unexpected value type in #{col}: #{v.class}" + end + end.to_h + updates[col] = PaperTrail.serializer.dump(normalized) + rescue => e + puts " [FAIL] Version #{version.id} #{col}: #{e.message}" + failed += 1 + end + end + + next if updates.empty? + + version.update_columns(updates) if for_real + fixed += 1 + end + + puts "\nVersions fixed: #{fixed}" + puts "Versions with errors: #{failed}" + end desc "Clean versions for all models - delete where only ignored fields changed, strip those fields from the rest. Set FOR_REAL=true to apply. Optionally filter with ITEM_TYPE=Foo." task clean_up: :environment do + puts "Cleaning up PaperTrail versions by removing ignored fields and deleting versions with only ignored fields changed..." for_real = ENV["FOR_REAL"] == "true" filter_item_type = ENV["ITEM_TYPE"] @@ -87,6 +153,7 @@ namespace :paper_trail do desc "Remove duplicate versions with identical object_changes created within 3 minutes of each other. Set FOR_REAL=true to apply." task deduplicate: :environment do + puts "Removing duplicate PaperTrail versions with identical object_changes created within 3 minutes of each other..." for_real = ENV["FOR_REAL"] == "true" puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" @@ -116,6 +183,7 @@ namespace :paper_trail do desc "Merge update versions into preceding create versions when done by the same user within 10 seconds, then delete the update. Set FOR_REAL=true to apply." task squash_create_updates: :environment do + puts "Squashing PaperTrail update versions into their create version when they have the same whodunnit and are created within 10 seconds of each other..." for_real = ENV["FOR_REAL"] == "true" puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" @@ -176,6 +244,7 @@ namespace :paper_trail do desc "Strip geojson['properties'] from Fmu version object_changes. Deletes version if geojson is the only change and becomes identical after stripping. Set FOR_REAL=true to apply." task strip_fmu_geojson_properties: :environment do + puts "Stripping geojson['properties'] from Fmu version object_changes, and deleting versions where geojson is the only change and becomes identical after stripping..." for_real = ENV["FOR_REAL"] == "true" puts for_real ? "RUNNING FOR REAL" : "DRY RUN (set FOR_REAL=true to apply changes)" From 4f83ee4c1acaa8950929928d2cc22cbbfcd43a53 Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Fri, 6 Mar 2026 12:26:37 +0100 Subject: [PATCH 15/16] fix importer spec - as geojson properties are updated after_save those won't be in results immediately --- .../importers/geojson_fmus_importer.json | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/spec/fixtures/snapshots/importers/geojson_fmus_importer.json b/spec/fixtures/snapshots/importers/geojson_fmus_importer.json index e435cad5..fed5dea1 100644 --- a/spec/fixtures/snapshots/importers/geojson_fmus_importer.json +++ b/spec/fixtures/snapshots/importers/geojson_fmus_importer.json @@ -9,7 +9,7 @@ "id": 100187, "num_ccf": null, "end_date": "2021-12-04", - "fmu_name": "asdf", + "fmu_name": "0901421", "fmu_type": 4110510, "globalid": "{87DC02A0-EBEA-4512-9512-8269FFA3F3E2}", "iso3_fmu": "CMR", @@ -39,19 +39,7 @@ "certific_5": null, "certific_6": null, "certific_7": 0, - "certific_8": null, - "operator_id": 10624, - "observations": 0, - "fmu_type_label": "communal", - "forest_type": "cf", - "certification_ls": false, - "certification_fsc": false, - "certification_olb": false, - "certification_tlv": false, - "certification_pafc": false, - "certification_pefc": false, - "certification_pbn": false, - "certification_fsc_cw": false + "certific_8": null }, "geometry": { "type": "Polygon", @@ -141,4 +129,4 @@ ] } } -} +} \ No newline at end of file From 10887af42b6a8a7b55db1caf8f3133e95718a994 Mon Sep 17 00:00:00 2001 From: Tomasz Subik Date: Fri, 6 Mar 2026 13:01:40 +0100 Subject: [PATCH 16/16] update geometry on create and on update separately --- app/models/fmu.rb | 5 +++-- spec/models/fmu_spec.rb | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app/models/fmu.rb b/app/models/fmu.rb index 384a4148..2a4897e5 100644 --- a/app/models/fmu.rb +++ b/app/models/fmu.rb @@ -50,7 +50,8 @@ class Fmu < ApplicationRecord validates :forest_type, presence: true validates :geojson, geojson: true, if: :geojson_changed? - after_save :update_geometry, if: :saved_change_to_geojson? + after_create :update_geometry, if: :geojson + after_update :update_geometry, if: :saved_change_to_geojson? after_save :update_geojson_properties # TODO Redo all of those @@ -148,6 +149,6 @@ def update_geometry end def update_centroid - self.class.unscoped.where(id: id).update_all("geojson = jsonb_set(geojson, '{properties,centroid}', ST_AsGeoJSON(st_centroid(geometry))::jsonb, true)") + self.class.unscoped.where(id: id).update_all("geojson = jsonb_set(jsonb_set(geojson, '{properties}', COALESCE(geojson -> 'properties', '{}'), true), '{properties,centroid}', ST_AsGeoJSON(st_centroid(geometry))::jsonb, true)") end end diff --git a/spec/models/fmu_spec.rb b/spec/models/fmu_spec.rb index d58c003e..6a59ffab 100644 --- a/spec/models/fmu_spec.rb +++ b/spec/models/fmu_spec.rb @@ -127,9 +127,8 @@ country = create(:country) operator = create(:operator, country: country, fa_id: "fa_id") fmu = create(:fmu_geojson, operator: operator, country: country) - fmu.save - fmu.reload + expect(fmu.geojson["geometry"]["coordinates"]).to eql(fmu.geometry.coordinates) end end