From 3916f761f33dc9812c8ece8bcbead28041f758dc Mon Sep 17 00:00:00 2001 From: Alex Nizamov Date: Tue, 7 Oct 2025 03:26:28 +0500 Subject: [PATCH 1/2] (#917) Upload envelope graphs to S3 during publishing --- .env | 2 + app/api/v1/publish.rb | 1 + app/models/envelope.rb | 10 +++ app/services/sync_envelope_graph_with_s3.rb | 57 +++++++++++++ .../20251022205617_add_s3_url_to_envelopes.rb | 6 ++ db/structure.sql | 11 ++- spec/factories/envelopes.rb | 2 +- spec/factories/resources.rb | 22 +----- .../sync_envelope_graph_with_s3_spec.rb | 79 +++++++++++++++++++ 9 files changed, 169 insertions(+), 21 deletions(-) create mode 100644 app/services/sync_envelope_graph_with_s3.rb create mode 100644 db/migrate/20251022205617_add_s3_url_to_envelopes.rb create mode 100644 spec/services/sync_envelope_graph_with_s3_spec.rb diff --git a/.env b/.env index cbd4d820..4d50aaa2 100644 --- a/.env +++ b/.env @@ -2,6 +2,8 @@ AWS_REGION=us-east-2 ENVELOPE_DOWNLOADS_BUCKET=envelope-downloads +ENVELOPE_GRAPHS_BUCKET= + POSTGRESQL_ADDRESS=localhost POSTGRESQL_USERNAME=metadataregistry POSTGRESQL_PASSWORD=metadataregistry diff --git a/app/api/v1/publish.rb b/app/api/v1/publish.rb index 588b69bd..0e4ae79b 100644 --- a/app/api/v1/publish.rb +++ b/app/api/v1/publish.rb @@ -1,5 +1,6 @@ require 'policies/envelope_policy' require 'services/publish_interactor' +require 'services/sync_envelope_graph_with_s3' module API module V1 diff --git a/app/models/envelope.rb b/app/models/envelope.rb index 1e425907..43770bf1 100644 --- a/app/models/envelope.rb +++ b/app/models/envelope.rb @@ -46,8 +46,10 @@ class Envelope < ActiveRecord::Base before_validation :process_resource, :process_headers before_save :assign_last_verified_on after_save :update_headers + after_save :upload_to_s3 before_destroy :delete_description_sets, prepend: true after_destroy :delete_from_ocn + after_destroy :delete_from_s3 after_commit :export_to_ocn validates :envelope_community, :envelope_type, :envelope_version, @@ -260,4 +262,12 @@ def export_to_ocn ExportToOCNJob.perform_later(id) end + + def upload_to_s3 + SyncEnvelopeGraphWithS3.upload(self) + end + + def delete_from_s3 + SyncEnvelopeGraphWithS3.remove(self) + end end diff --git a/app/services/sync_envelope_graph_with_s3.rb b/app/services/sync_envelope_graph_with_s3.rb new file mode 100644 index 00000000..6a9b39e1 --- /dev/null +++ b/app/services/sync_envelope_graph_with_s3.rb @@ -0,0 +1,57 @@ +# Uploads or deletes an envelope graph from the S3 bucket +class SyncEnvelopeGraphWithS3 + attr_reader :envelope + + delegate :envelope_community, :envelope_ceterms_ctid, to: :envelope + + def initialize(envelope) + @envelope = envelope + end + + class << self + def upload(envelope) + new(envelope).upload + end + + def remove(envelope) + new(envelope).remove + end + end + + def upload + return unless s3_bucket_name + + s3_object.put( + body: envelope.processed_resource.to_json, + content_type: 'application/json' + ) + + envelope.update_column(:s3_url, s3_object.public_url) + end + + def remove + return unless s3_bucket_name + + s3_object.delete + end + + def s3_bucket + @s3_bucket ||= s3_resource.bucket(s3_bucket_name) + end + + def s3_bucket_name + ENV['ENVELOPE_GRAPHS_BUCKET'].presence + end + + def s3_key + "#{envelope_community.name}/#{envelope_ceterms_ctid}.json" + end + + def s3_object + @s3_object ||= s3_bucket.object(s3_key) + end + + def s3_resource + @s3_resource ||= Aws::S3::Resource.new(region: ENV['AWS_REGION'].presence) + end +end diff --git a/db/migrate/20251022205617_add_s3_url_to_envelopes.rb b/db/migrate/20251022205617_add_s3_url_to_envelopes.rb new file mode 100644 index 00000000..547bedad --- /dev/null +++ b/db/migrate/20251022205617_add_s3_url_to_envelopes.rb @@ -0,0 +1,6 @@ +class AddS3UrlToEnvelopes < ActiveRecord::Migration[8.0] + def change + add_column :envelopes, :s3_url, :string + add_index :envelopes, :s3_url, unique: true + end +end diff --git a/db/structure.sql b/db/structure.sql index 13067ab7..dc537b86 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -432,7 +432,8 @@ CREATE TABLE public.envelopes ( publishing_organization_id uuid, resource_publish_type character varying, last_verified_on date, - publication_status integer DEFAULT 0 NOT NULL + publication_status integer DEFAULT 0 NOT NULL, + s3_url character varying ); @@ -1480,6 +1481,13 @@ CREATE INDEX index_envelopes_on_purged_at ON public.envelopes USING btree (purge CREATE INDEX index_envelopes_on_resource_type ON public.envelopes USING btree (resource_type); +-- +-- Name: index_envelopes_on_s3_url; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX index_envelopes_on_s3_url ON public.envelopes USING btree (s3_url); + + -- -- Name: index_envelopes_on_top_level_object_ids; Type: INDEX; Schema: public; Owner: - -- @@ -1889,6 +1897,7 @@ ALTER TABLE ONLY public.envelopes SET search_path TO "$user", public; INSERT INTO "schema_migrations" (version) VALUES +('20251022205617'), ('20250925025616'), ('20250922224518'), ('20250921174021'), diff --git a/spec/factories/envelopes.rb b/spec/factories/envelopes.rb index 9c5ca056..2a09606e 100644 --- a/spec/factories/envelopes.rb +++ b/spec/factories/envelopes.rb @@ -1,6 +1,6 @@ FactoryBot.define do factory :envelope do - envelope_ceterms_ctid { Envelope.generate_ctid } + envelope_ceterms_ctid { processed_resource[:'ceterms:ctid'] || Envelope.generate_ctid } envelope_ctdl_type { 'ceterms:CredentialOrganization' } envelope_type { :resource_data } envelope_version { '0.52.0' } diff --git a/spec/factories/resources.rb b/spec/factories/resources.rb index fcf6b309..34d96a0e 100644 --- a/spec/factories/resources.rb +++ b/spec/factories/resources.rb @@ -1,12 +1,15 @@ FactoryBot.define do factory :base_resource, class: 'Hashie::Mash' do transient do + ctid { Envelope.generate_ctid } provisional { false } end add_attribute(:'adms:status') do 'graphPublicationStatus:Provisional' if provisional end + + add_attribute(:'ceterms:ctid') { ctid } end factory :resource, parent: :base_resource do @@ -19,11 +22,9 @@ factory :cer_org, parent: :base_resource do add_attribute(:@type) { 'ceterms:CredentialOrganization' } add_attribute(:@context) { 'http://credreg.net/ctdl/schema/context/json' } - transient { ctid { Envelope.generate_ctid } } add_attribute(:@id) do "http://credentialengineregistry.org/resources/#{ctid}" end - add_attribute(:'ceterms:ctid') { ctid } add_attribute(:'ceterms:name') { 'Test Org' } add_attribute(:'ceterms:description') { 'Org Description' } add_attribute(:'ceterms:subjectWebpage') { 'http://example.com/test-org' } @@ -51,8 +52,6 @@ end add_attribute(:@type) { 'ceterms:Certificate' } add_attribute(:@context) { 'http://credreg.net/ctdl/schema/context/json' } - transient { ctid { Envelope.generate_ctid } } - add_attribute(:'ceterms:ctid') { ctid } add_attribute(:'ceterms:name') { 'Test Cred' } add_attribute(:'ceterms:description') { 'Test Cred Description' } add_attribute(:'ceterms:subjectWebpage') { 'http://example.com/test-cred' } @@ -69,22 +68,18 @@ factory :cer_ass_prof, parent: :base_resource do add_attribute(:@type) { 'ceterms:AssessmentProfile' } add_attribute(:@context) { 'http://credreg.net/ctdl/schema/context/json' } - transient { ctid { Envelope.generate_ctid } } add_attribute(:@id) do "http://credentialengineregistry.org/resources/#{ctid}" end - add_attribute(:'ceterms:ctid') { ctid } add_attribute(:'ceterms:name') { 'Test Assessment Profile' } end factory :cer_cond_man, parent: :base_resource do add_attribute(:@type) { 'ceterms:ConditionManifest' } add_attribute(:@context) { 'http://credreg.net/ctdl/schema/context/json' } - transient { ctid { Envelope.generate_ctid } } add_attribute(:@id) do "http://credentialengineregistry.org/resources/#{ctid}" end - add_attribute(:'ceterms:ctid') { ctid } add_attribute(:'ceterms:name') { 'Test Cond Man' } add_attribute(:'ceterms:conditionManifestOf') { [{ '@id' => 'AgentID' }] } end @@ -92,11 +87,9 @@ factory :cer_cost_man, parent: :base_resource do add_attribute(:@type) { 'ceterms:CostManifest' } add_attribute(:@context) { 'http://credreg.net/ctdl/schema/context/json' } - transient { ctid { Envelope.generate_ctid } } add_attribute(:@id) do "http://credentialengineregistry.org/resources/#{ctid}" end - add_attribute(:'ceterms:ctid') { ctid } add_attribute(:'ceterms:name') { 'Test Cost Man' } add_attribute(:'ceterms:costDetails') { 'CostDetails' } add_attribute(:'ceterms:costManifestOf') { [{ '@id' => 'AgentID' }] } @@ -105,11 +98,9 @@ factory :cer_lrn_opp_prof, parent: :base_resource do add_attribute(:@type) { 'ceterms:CostManifest' } add_attribute(:@context) { 'http://credreg.net/ctdl/schema/context/json' } - transient { ctid { Envelope.generate_ctid } } add_attribute(:@id) do "http://credentialengineregistry.org/resources/#{ctid}" end - add_attribute(:'ceterms:ctid') { ctid } add_attribute(:'ceterms:name') { 'Test Lrn Opp Prof' } add_attribute(:'ceterms:costDetails') { 'CostDetails' } add_attribute(:'ceterms:costManifestOf') { [{ '@id' => 'AgentID' }] } @@ -141,7 +132,6 @@ add_attribute(:@id) { ctid } add_attribute(:@type) { 'ceterms:AssessmentProfile' } add_attribute(:@context) { 'http://credreg.net/ctdl/schema/context/json' } - add_attribute(:'ceterms:ctid') { ctid } add_attribute(:'ceterms:name') { 'Test Assessment Profile' } add_attribute(:'ceasn:isPartOf') { part_of } end @@ -149,29 +139,24 @@ factory :cer_competency, parent: :base_resource do transient { part_of { nil } } transient { competency_text { 'This is the competency text...' } } - transient { ctid { Envelope.generate_ctid } } id { "http://credentialengineregistry.org/resources/#{ctid}" } add_attribute(:@id) { id } add_attribute(:@type) { 'ceasn:Competency' } - add_attribute(:'ceterms:ctid') { ctid } add_attribute(:'ceasn:isPartOf') { part_of } add_attribute(:'ceasn:inLanguage') { ['en'] } add_attribute(:'ceasn:competencyText') { { 'en-us' => competency_text } } end factory :cer_competency_framework, parent: :base_resource do - transient { ctid { Envelope.generate_ctid } } id { "http://credentialengineregistry.org/resources/#{ctid}" } add_attribute(:@id) { id } add_attribute(:@type) { 'ceasn:CompetencyFramework' } - add_attribute(:'ceterms:ctid') { ctid } add_attribute(:'ceasn:inLanguage') { ['en'] } add_attribute(:'ceasn:name') { { 'en-us' => 'Competency Framework name' } } add_attribute(:'ceasn:description') { { 'en-us' => 'Competency Framework description' } } end factory :cer_graph_competency_framework, parent: :base_resource do - transient { ctid { Envelope.generate_ctid } } id { "http://credentialengineregistry.org/resources/#{ctid}" } add_attribute(:@id) { id } add_attribute(:@type) { 'ceasn:CompetencyFramework' } @@ -186,6 +171,5 @@ attributes_for(:cer_competency_framework, ctid: ctid) ] end - add_attribute(:'ceterms:ctid') { ctid } end end diff --git a/spec/services/sync_envelope_graph_with_s3_spec.rb b/spec/services/sync_envelope_graph_with_s3_spec.rb new file mode 100644 index 00000000..2ec8f604 --- /dev/null +++ b/spec/services/sync_envelope_graph_with_s3_spec.rb @@ -0,0 +1,79 @@ +RSpec.describe SyncEnvelopeGraphWithS3 do # rubocop:todo RSpec/MultipleMemoizedHelpers + let(:envelope) { build(:envelope, :from_cer) } + let(:s3_bucket) { double('s3_bucket') } # rubocop:todo RSpec/VerifiedDoubles + let(:s3_bucket_name) { Faker::Lorem.word } + let(:s3_object) { double('s3_object') } # rubocop:todo RSpec/VerifiedDoubles + let(:s3_region) { 'aws-s3_region-test' } + let(:s3_resource) { double('s3_resource') } # rubocop:todo RSpec/VerifiedDoubles + let(:s3_url) { Faker::Internet.url } + + context 'without bucket' do # rubocop:todo RSpec/MultipleMemoizedHelpers + describe '.upload' do # rubocop:todo RSpec/MultipleMemoizedHelpers + it 'does nothing' do + expect { described_class.upload(envelope) }.not_to raise_error + end + end + + describe '.remove' do # rubocop:todo RSpec/MultipleMemoizedHelpers + it 'does nothing' do + expect { described_class.remove(envelope) }.not_to raise_error + end + end + end + + context 'with bucket' do # rubocop:todo RSpec/MultipleMemoizedHelpers + before do + ENV['AWS_REGION'] = s3_region + ENV['ENVELOPE_GRAPHS_BUCKET'] = s3_bucket_name + + # rubocop:todo RSpec/MessageSpies + expect(Aws::S3::Resource).to receive(:new) # rubocop:todo RSpec/ExpectInHook, RSpec/MessageSpies + # rubocop:enable RSpec/MessageSpies + .with(region: s3_region) + .and_return(s3_resource) + .at_least(:once) + + # rubocop:todo RSpec/MessageSpies + expect(s3_resource).to receive(:bucket) # rubocop:todo RSpec/ExpectInHook, RSpec/MessageSpies + # rubocop:enable RSpec/MessageSpies + .with(s3_bucket_name) + .and_return(s3_bucket) + .at_least(:once) + + # rubocop:todo RSpec/MessageSpies + expect(s3_bucket).to receive(:object) # rubocop:todo RSpec/ExpectInHook, RSpec/MessageSpies + # rubocop:enable RSpec/MessageSpies + .with("ce_registry/#{envelope.envelope_ceterms_ctid}.json") + .and_return(s3_object) + .at_least(:once) + + # rubocop:todo RSpec/MessageSpies + expect(s3_object).to receive(:put).with( # rubocop:todo RSpec/ExpectInHook, RSpec/MessageSpies + # rubocop:enable RSpec/MessageSpies + body: envelope.processed_resource.to_json, + content_type: 'application/json' + ) + + # rubocop:todo RSpec/StubbedMock + # rubocop:todo RSpec/MessageSpies + expect(s3_object).to receive(:public_url).and_return(s3_url) # rubocop:todo RSpec/ExpectInHook, RSpec/MessageSpies, RSpec/StubbedMock + # rubocop:enable RSpec/MessageSpies + # rubocop:enable RSpec/StubbedMock + end + + describe '.upload' do # rubocop:todo RSpec/MultipleMemoizedHelpers + it 'uploads the s3_resource to S3' do + envelope.save! + expect(envelope.s3_url).to eq(s3_url) + end + end + + describe '.remove' do # rubocop:todo RSpec/MultipleMemoizedHelpers + it 'uploads the s3_resource to S3' do + expect(s3_object).to receive(:delete) # rubocop:todo RSpec/MessageSpies + envelope.save! + expect { envelope.destroy }.not_to raise_error + end + end + end +end From ba54f43035c9990dd912c945e0dfcee62dfcb78e Mon Sep 17 00:00:00 2001 From: Ariel Rolfo <96243774+arielr-lt@users.noreply.github.com> Date: Thu, 30 Oct 2025 18:23:17 -0300 Subject: [PATCH 2/2] Modify CI workflow for bundler and coverage handling Updated GitHub Actions workflow to disable bundler cache, adjust coverage report upload condition, and remove Semgrep steps. --- .github/workflows/test.yaml | 47 ++++++++++--------------------------- 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 592d9c05..b0609ff7 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,3 +1,4 @@ + name: Run linter and tests on: @@ -41,6 +42,8 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + submodules: recursive + - name: Pre-cache grape-middleware-logger gem run: | @@ -51,7 +54,13 @@ jobs: - uses: ruby/setup-ruby@v1 with: - bundler-cache: true + bundler-cache: false + ruby-version: '3.4' + - name: Install gems (non-frozen) + run: | + bundle config set path vendor/bundle + bundle config set frozen false + bundle install --jobs 4 - run: RACK_ENV=test bundle exec rake db:migrate # Rubocop, bundler-audit, etc. are executed through Overcommit hooks. @@ -70,40 +79,8 @@ jobs: SONAR_HOST_URL: ${{ vars.SONAR_HOST_URL }} - name: Upload coverage report - if: always() + if: ${{ always() && hashFiles('coverage/**') != '' }} uses: actions/upload-artifact@v4 with: name: coverage-report - path: coverage - - semgrep: - name: "Semgrep SAST" - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' - - name: Install Semgrep - run: | - python3 -m pip install --upgrade pip - python3 -m pip install semgrep - - name: Run Semgrep (Ruby/JS) - run: | - semgrep --config p/r2c-security-audit \ - --include app --include lib \ - --error --timeout 180 - - name: Export Semgrep SARIF - if: always() - run: | - semgrep --config p/r2c-security-audit \ - --include app --include lib \ - --sarif -o semgrep.sarif || true - - name: Upload Semgrep SARIF - if: always() - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: semgrep.sarif + path: coverage/