From 23c5c0aa5e8ae387dd7479affd07cd966f96c256 Mon Sep 17 00:00:00 2001 From: Markos Volikas Date: Sun, 9 Nov 2025 17:00:46 +0200 Subject: [PATCH 1/8] migrate to model version 4.1.0, remove explicit submodules, use parent inference and subprojects discovery --- archetype/pom.xml | 11 +++---- core/pom.xml | 12 +++----- external/ai/pom.xml | 11 ++----- external/aws/pom.xml | 13 +++------ external/langid/pom.xml | 13 +++------ external/opensearch/archetype/pom.xml | 41 +++++++++++++-------------- external/opensearch/pom.xml | 17 ++++++----- external/playwright/pom.xml | 11 ++----- external/pom.xml | 14 ++------- external/selenium/pom.xml | 12 ++------ external/solr/archetype/pom.xml | 9 ++---- external/solr/pom.xml | 15 +++++----- external/sql/pom.xml | 13 +++------ external/tika/pom.xml | 11 ++----- external/urlfrontier/pom.xml | 11 ++----- external/warc/pom.xml | 12 ++------ pom.xml | 24 ++-------------- 17 files changed, 80 insertions(+), 170 deletions(-) diff --git a/archetype/pom.xml b/archetype/pom.xml index 1e9edcbdc..de83e5045 100644 --- a/archetype/pom.xml +++ b/archetype/pom.xml @@ -19,13 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - org.apache.stormcrawler - stormcrawler - 3.5.1-SNAPSHOT - + + + + stormcrawler-archetype stormcrawler-archetype diff --git a/core/pom.xml b/core/pom.xml index 4bc9c0bed..b5a769cc0 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -19,14 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler - 3.5.1-SNAPSHOT - + + + + stormcrawler-core jar diff --git a/external/ai/pom.xml b/external/ai/pom.xml index 34cfa79ff..a34c9fff9 100644 --- a/external/ai/pom.xml +++ b/external/ai/pom.xml @@ -19,14 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + + + stormcrawler-ai diff --git a/external/aws/pom.xml b/external/aws/pom.xml index 4fb724ea2..ed09cc50d 100644 --- a/external/aws/pom.xml +++ b/external/aws/pom.xml @@ -19,15 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + + + + stormcrawler-aws jar diff --git a/external/langid/pom.xml b/external/langid/pom.xml index ab8398d02..1cf87fb8e 100644 --- a/external/langid/pom.xml +++ b/external/langid/pom.xml @@ -19,15 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + + + + stormcrawler-langid jar diff --git a/external/opensearch/archetype/pom.xml b/external/opensearch/archetype/pom.xml index 237d9a2d1..aafdc9b0c 100644 --- a/external/opensearch/archetype/pom.xml +++ b/external/opensearch/archetype/pom.xml @@ -19,14 +19,11 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + + - org.apache.stormcrawler - stormcrawler - 3.5.1-SNAPSHOT - ../../../pom.xml + ../../.. stormcrawler-opensearch-archetype @@ -35,22 +32,22 @@ under the License. - - - src/main/resources - true - - META-INF/maven/archetype-metadata.xml - - - - src/main/resources - false - - META-INF/maven/archetype-metadata.xml - - - + + + src/main/resources + true + + META-INF/maven/archetype-metadata.xml + + + + src/main/resources + false + + META-INF/maven/archetype-metadata.xml + + + diff --git a/external/opensearch/pom.xml b/external/opensearch/pom.xml index 888f874bb..9c99b562e 100644 --- a/external/opensearch/pom.xml +++ b/external/opensearch/pom.xml @@ -19,15 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + + - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + 2.19.3 @@ -110,6 +105,10 @@ under the License. slf4j-simple test - + + + archetype + + diff --git a/external/playwright/pom.xml b/external/playwright/pom.xml index 2df4fbaf8..a69170941 100644 --- a/external/playwright/pom.xml +++ b/external/playwright/pom.xml @@ -19,15 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + + - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + stormcrawler-playwright jar diff --git a/external/pom.xml b/external/pom.xml index 6324f5c9e..561fc3df3 100644 --- a/external/pom.xml +++ b/external/pom.xml @@ -19,15 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler - 3.5.1-SNAPSHOT - ../pom.xml - + + + stormcrawler-external pom @@ -62,7 +56,5 @@ under the License. ${mockito.version} test - - diff --git a/external/selenium/pom.xml b/external/selenium/pom.xml index ef7f37c57..a9559948b 100644 --- a/external/selenium/pom.xml +++ b/external/selenium/pom.xml @@ -19,15 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + + - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + stormcrawler-selenium jar @@ -102,7 +97,6 @@ under the License. junit-jupiter test - diff --git a/external/solr/archetype/pom.xml b/external/solr/archetype/pom.xml index 5bd2f29da..5d855a41f 100644 --- a/external/solr/archetype/pom.xml +++ b/external/solr/archetype/pom.xml @@ -19,14 +19,11 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + + - org.apache.stormcrawler - stormcrawler - 3.5.1-SNAPSHOT - ../../../pom.xml + ../../.. stormcrawler-solr-archetype diff --git a/external/solr/pom.xml b/external/solr/pom.xml index 3444a90e7..1271103b7 100644 --- a/external/solr/pom.xml +++ b/external/solr/pom.xml @@ -19,15 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + + - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + stormcrawler-solr jar @@ -69,4 +64,8 @@ under the License. + + archetype + + diff --git a/external/sql/pom.xml b/external/sql/pom.xml index 0eece4e64..630c8aa36 100644 --- a/external/sql/pom.xml +++ b/external/sql/pom.xml @@ -19,15 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + + + + stormcrawler-sql jar diff --git a/external/tika/pom.xml b/external/tika/pom.xml index b956dc812..14f74c030 100644 --- a/external/tika/pom.xml +++ b/external/tika/pom.xml @@ -19,15 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + + - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + stormcrawler-tika jar diff --git a/external/urlfrontier/pom.xml b/external/urlfrontier/pom.xml index e4d4d5c5b..6a18aa26d 100644 --- a/external/urlfrontier/pom.xml +++ b/external/urlfrontier/pom.xml @@ -19,15 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + + - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + stormcrawler-urlfrontier jar diff --git a/external/warc/pom.xml b/external/warc/pom.xml index 748333fcd..858dfc941 100644 --- a/external/warc/pom.xml +++ b/external/warc/pom.xml @@ -19,15 +19,10 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + + - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + stormcrawler-warc jar @@ -96,6 +91,5 @@ under the License. test-jar test - diff --git a/pom.xml b/pom.xml index 50b69054b..4ee6b85bd 100644 --- a/pom.xml +++ b/pom.xml @@ -19,8 +19,8 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + + org.apache @@ -671,27 +671,7 @@ under the License. commons-codec ${commons.codec.version} - - - core - external - external/ai - external/aws - external/langid - external/opensearch - external/playwright - external/selenium - external/solr - external/sql - external/tika - external/urlfrontier - external/warc - archetype - external/opensearch/archetype - external/solr/archetype - - From e5516d3047dcfb9fc55171ea6e074d7dc54dea2d Mon Sep 17 00:00:00 2001 From: Markos Volikas Date: Sat, 22 Nov 2025 16:40:13 +0200 Subject: [PATCH 2/8] #1662 - update action to test with maven 4 --- .github/workflows/maven.yml | 43 ++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 2dcfa4ab1..1cb79fcd9 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -28,17 +28,14 @@ jobs: rat: runs-on: ubuntu-latest steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + - name: Setup Maven environment (RAT) + uses: s4u/setup-maven-action@v1.19.0 with: - path: ~/.m2/repository - key: rat-maven-${{ hashFiles('**/pom.xml') }} - - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5.0.0 - with: - distribution: adopt java-version: 17 - - name: Build with Maven + java-distribution: adopt + maven-version: 3.9.11 + + - name: Build with Maven (RAT profile) run: mvn -B --no-transfer-progress -Prat -DskipTests verify -Dskip.format.code=false build: @@ -48,20 +45,18 @@ jobs: strategy: matrix: os: [ubuntu-latest] - java: [ 17 ] + java: [17] + # Test both Maven 3 and Maven 4 + maven: ['3.9.11', '4.0.0-rc-5'] experimental: [false] + steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-maven- - - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5.0.0 - with: - distribution: adopt - java-version: ${{ matrix.java }} - - name: Build with Maven - run: mvn -B --no-transfer-progress package --file pom.xml -DCI_ENV=true verify + - name: Setup Maven environment (build) + uses: s4u/setup-maven-action@v1.19.0 + with: + java-version: ${{ matrix.java }} + java-distribution: adopt + maven-version: ${{ matrix.maven }} + + - name: Build with Maven + run: mvn -B --no-transfer-progress package --file pom.xml -DCI_ENV=true verify From 1797916ed8dd9bd92cc99a90927fa88280c42a51 Mon Sep 17 00:00:00 2001 From: Markos Volikas Date: Sat, 22 Nov 2025 17:13:37 +0200 Subject: [PATCH 3/8] #1662 - test with maven-gh-actions-shared --- .github/workflows/maven.yml | 43 ++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 1cb79fcd9..50269f12e 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -28,35 +28,30 @@ jobs: rat: runs-on: ubuntu-latest steps: - - name: Setup Maven environment (RAT) - uses: s4u/setup-maven-action@v1.19.0 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: + path: ~/.m2/repository + key: rat-maven-${{ hashFiles('**/pom.xml') }} + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5.0.0 + with: + distribution: adopt java-version: 17 - java-distribution: adopt - maven-version: 3.9.11 - - - name: Build with Maven (RAT profile) + - name: Build with Maven run: mvn -B --no-transfer-progress -Prat -DskipTests verify -Dskip.format.code=false build: needs: rat - runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.experimental }} - strategy: - matrix: - os: [ubuntu-latest] - java: [17] - # Test both Maven 3 and Maven 4 - maven: ['3.9.11', '4.0.0-rc-5'] - experimental: [false] + uses: apache/maven-gh-actions-shared/.github/workflows/maven-verify.yml@v4 - steps: - - name: Setup Maven environment (build) - uses: s4u/setup-maven-action@v1.19.0 - with: - java-version: ${{ matrix.java }} - java-distribution: adopt - maven-version: ${{ matrix.maven }} + with: + os-matrix: '[ "ubuntu-latest" ]' + jdk-matrix: '[ "17" ]' + jdk-distribution-matrix: '[ "temurin" ]' - - name: Build with Maven - run: mvn -B --no-transfer-progress package --file pom.xml -DCI_ENV=true verify + maven4-enabled: true + + ff-goal: 'verify' + verify-goal: 'verify' + ff-site-run: false From 189041f0eab6042288624ccd73cedc0d7952a0d0 Mon Sep 17 00:00:00 2001 From: Markos Volikas Date: Sat, 22 Nov 2025 17:20:58 +0200 Subject: [PATCH 4/8] #1662 - separate workflow for maven 4 --- .github/workflows/maven.yml | 36 +++++++++++++++---------- .github/workflows/verify-maven-4.yml | 39 ++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/verify-maven-4.yml diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 50269f12e..288a66786 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -28,7 +28,7 @@ jobs: rat: runs-on: ubuntu-latest steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ~/.m2/repository @@ -43,15 +43,25 @@ jobs: build: needs: rat - uses: apache/maven-gh-actions-shared/.github/workflows/maven-verify.yml@v4 - - with: - os-matrix: '[ "ubuntu-latest" ]' - jdk-matrix: '[ "17" ]' - jdk-distribution-matrix: '[ "temurin" ]' - - maven4-enabled: true - - ff-goal: 'verify' - verify-goal: 'verify' - ff-site-run: false + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.experimental }} + strategy: + matrix: + os: [ubuntu-latest] + java: [ 17 ] + experimental: [false] + steps: + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5.0.0 + with: + distribution: adopt + java-version: ${{ matrix.java }} + - name: Build with Maven + run: mvn -B --no-transfer-progress package --file pom.xml -DCI_ENV=true verify \ No newline at end of file diff --git a/.github/workflows/verify-maven-4.yml b/.github/workflows/verify-maven-4.yml new file mode 100644 index 000000000..5b05c4647 --- /dev/null +++ b/.github/workflows/verify-maven-4.yml @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java CI with Maven 4 + +on: + # Run CI on Pushes to "main"" or on pull requests targeting "main". + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build: + uses: apache/maven-gh-actions-shared/.github/workflows/maven-verify.yml@v4 + with: + os-matrix: '[ "ubuntu-latest" ]' + jdk-matrix: '[ "17" ]' + jdk-distribution-matrix: '[ "temurin" ]' + + maven4-enabled: true + + ff-goal: 'verify' + verify-goal: 'verify' + ff-site-run: false From f1f48d779256eb0ef14cc29c3f211e8bb84596f1 Mon Sep 17 00:00:00 2001 From: Markos Volikas Date: Sat, 22 Nov 2025 17:25:39 +0200 Subject: [PATCH 5/8] #1662 - build with maven 4 only --- .github/workflows/verify-maven-4.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/verify-maven-4.yml b/.github/workflows/verify-maven-4.yml index 5b05c4647..6c29043b2 100644 --- a/.github/workflows/verify-maven-4.yml +++ b/.github/workflows/verify-maven-4.yml @@ -32,7 +32,7 @@ jobs: jdk-matrix: '[ "17" ]' jdk-distribution-matrix: '[ "temurin" ]' - maven4-enabled: true + maven4-build: true ff-goal: 'verify' verify-goal: 'verify' From 2892e8cfa7f9ac0ffd50d0480840bac1f7156572 Mon Sep 17 00:00:00 2001 From: Markos Volikas Date: Sat, 22 Nov 2025 17:43:17 +0200 Subject: [PATCH 6/8] #1662 - install maven 4 in the workflow --- .github/workflows/verify-maven-4.yml | 51 ++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/.github/workflows/verify-maven-4.yml b/.github/workflows/verify-maven-4.yml index 6c29043b2..081f7c0a6 100644 --- a/.github/workflows/verify-maven-4.yml +++ b/.github/workflows/verify-maven-4.yml @@ -5,7 +5,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -16,7 +16,7 @@ name: Java CI with Maven 4 on: - # Run CI on Pushes to "main"" or on pull requests targeting "main". + # Run CI on pushes to "main" or on pull requests targeting "main". push: branches: - main @@ -25,15 +25,38 @@ on: - main jobs: - build: - uses: apache/maven-gh-actions-shared/.github/workflows/maven-verify.yml@v4 - with: - os-matrix: '[ "ubuntu-latest" ]' - jdk-matrix: '[ "17" ]' - jdk-distribution-matrix: '[ "temurin" ]' - - maven4-build: true - - ff-goal: 'verify' - verify-goal: 'verify' - ff-site-run: false + build-maven4: + runs-on: ubuntu-latest + + steps: + - name: Checkout sources + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + + - name: Set up JDK 17 + uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5.0.0 + with: + distribution: temurin + java-version: 17 + + - name: Cache local Maven repository + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven4-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven4- + + - name: Install Maven 4 + run: | + MAVEN_VERSION=4.0.0-rc-5 + BASE_URL="https://dlcdn.apache.org/maven/maven-4/${MAVEN_VERSION}/binaries" + curl -fsSL "${BASE_URL}/apache-maven-${MAVEN_VERSION}-bin.tar.gz" -o maven.tar.gz + mkdir -p "$HOME/maven" + tar -xzf maven.tar.gz -C "$HOME/maven" + echo "M2_HOME=$HOME/maven/apache-maven-${MAVEN_VERSION}" >> "$GITHUB_ENV" + echo "$HOME/maven/apache-maven-${MAVEN_VERSION}/bin" >> "$GITHUB_PATH" + rm maven.tar.gz + mvn -version + + - name: Build with Maven 4 + run: mvn --errors --batch-mode --show-version verify From 677d51b265b6e51d121210d5e3ddc181a0cd48c9 Mon Sep 17 00:00:00 2001 From: Markos Volikas Date: Sun, 23 Nov 2025 17:05:20 +0200 Subject: [PATCH 7/8] #1662 - fix subprojects for rc-5 --- .github/workflows/verify-maven-4.yml | 5 ++--- external/opensearch/pom.xml | 4 ---- external/pom.xml | 16 ++++++++++++++++ external/solr/pom.xml | 4 ---- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/.github/workflows/verify-maven-4.yml b/.github/workflows/verify-maven-4.yml index 081f7c0a6..8545c81de 100644 --- a/.github/workflows/verify-maven-4.yml +++ b/.github/workflows/verify-maven-4.yml @@ -35,7 +35,7 @@ jobs: - name: Set up JDK 17 uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5.0.0 with: - distribution: temurin + distribution: adopt java-version: 17 - name: Cache local Maven repository @@ -56,7 +56,6 @@ jobs: echo "M2_HOME=$HOME/maven/apache-maven-${MAVEN_VERSION}" >> "$GITHUB_ENV" echo "$HOME/maven/apache-maven-${MAVEN_VERSION}/bin" >> "$GITHUB_PATH" rm maven.tar.gz - mvn -version - name: Build with Maven 4 - run: mvn --errors --batch-mode --show-version verify + run: mvn -B --no-transfer-progress package --file pom.xml -DCI_ENV=true verify diff --git a/external/opensearch/pom.xml b/external/opensearch/pom.xml index 85d4a8793..7773e3782 100644 --- a/external/opensearch/pom.xml +++ b/external/opensearch/pom.xml @@ -107,8 +107,4 @@ under the License. - - archetype - - diff --git a/external/pom.xml b/external/pom.xml index 561fc3df3..961ec9f73 100644 --- a/external/pom.xml +++ b/external/pom.xml @@ -26,6 +26,22 @@ under the License. stormcrawler-external pom + + ai + aws + langid + opensearch + opensearch/archetype + playwright + selenium + solr + solr/archetype + sql + tika + urlfrontier + warc + + org.apache.storm diff --git a/external/solr/pom.xml b/external/solr/pom.xml index 6431cc345..fdcd324bb 100644 --- a/external/solr/pom.xml +++ b/external/solr/pom.xml @@ -64,8 +64,4 @@ under the License. - - archetype - - From 79b71933c2c47ad70f95fb44c3906e233d303927 Mon Sep 17 00:00:00 2001 From: Markos Volikas Date: Sun, 23 Nov 2025 17:28:58 +0200 Subject: [PATCH 8/8] #1662 - use uniform indentation format, use maven 4 in maven.yml workflow --- .github/workflows/maven.yml | 20 +++++ .github/workflows/verify-maven-4.yml | 61 ------------- archetype/pom.xml | 94 ++++++++++---------- core/pom.xml | 1 - external/ai/pom.xml | 65 +++++++------- external/aws/pom.xml | 1 - external/langid/pom.xml | 1 - external/opensearch/archetype/pom.xml | 82 +++++++++--------- external/opensearch/pom.xml | 1 - external/playwright/pom.xml | 1 - external/pom.xml | 2 +- external/selenium/pom.xml | 1 - external/solr/archetype/pom.xml | 78 ++++++++--------- external/solr/pom.xml | 1 - external/sql/pom.xml | 1 - external/tika/pom.xml | 1 - external/urlfrontier/pom.xml | 1 - external/warc/pom.xml | 5 +- pom.xml | 119 +++++++++++++------------- 19 files changed, 239 insertions(+), 297 deletions(-) delete mode 100644 .github/workflows/verify-maven-4.yml diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 288a66786..fcd91ac85 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -38,6 +38,16 @@ jobs: with: distribution: adopt java-version: 17 + - name: Install Maven 4 + run: | + MAVEN_VERSION=4.0.0-rc-5 + BASE_URL="https://dlcdn.apache.org/maven/maven-4/${MAVEN_VERSION}/binaries" + curl -fsSL "${BASE_URL}/apache-maven-${MAVEN_VERSION}-bin.tar.gz" -o maven.tar.gz + mkdir -p "$HOME/maven" + tar -xzf maven.tar.gz -C "$HOME/maven" + echo "M2_HOME=$HOME/maven/apache-maven-${MAVEN_VERSION}" >> "$GITHUB_ENV" + echo "$HOME/maven/apache-maven-${MAVEN_VERSION}/bin" >> "$GITHUB_PATH" + rm maven.tar.gz - name: Build with Maven run: mvn -B --no-transfer-progress -Prat -DskipTests verify -Dskip.format.code=false @@ -63,5 +73,15 @@ jobs: with: distribution: adopt java-version: ${{ matrix.java }} + - name: Install Maven 4 + run: | + MAVEN_VERSION=4.0.0-rc-5 + BASE_URL="https://dlcdn.apache.org/maven/maven-4/${MAVEN_VERSION}/binaries" + curl -fsSL "${BASE_URL}/apache-maven-${MAVEN_VERSION}-bin.tar.gz" -o maven.tar.gz + mkdir -p "$HOME/maven" + tar -xzf maven.tar.gz -C "$HOME/maven" + echo "M2_HOME=$HOME/maven/apache-maven-${MAVEN_VERSION}" >> "$GITHUB_ENV" + echo "$HOME/maven/apache-maven-${MAVEN_VERSION}/bin" >> "$GITHUB_PATH" + rm maven.tar.gz - name: Build with Maven run: mvn -B --no-transfer-progress package --file pom.xml -DCI_ENV=true verify \ No newline at end of file diff --git a/.github/workflows/verify-maven-4.yml b/.github/workflows/verify-maven-4.yml deleted file mode 100644 index 8545c81de..000000000 --- a/.github/workflows/verify-maven-4.yml +++ /dev/null @@ -1,61 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: Java CI with Maven 4 - -on: - # Run CI on pushes to "main" or on pull requests targeting "main". - push: - branches: - - main - pull_request: - branches: - - main - -jobs: - build-maven4: - runs-on: ubuntu-latest - - steps: - - name: Checkout sources - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 - - - name: Set up JDK 17 - uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5.0.0 - with: - distribution: adopt - java-version: 17 - - - name: Cache local Maven repository - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven4-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-maven4- - - - name: Install Maven 4 - run: | - MAVEN_VERSION=4.0.0-rc-5 - BASE_URL="https://dlcdn.apache.org/maven/maven-4/${MAVEN_VERSION}/binaries" - curl -fsSL "${BASE_URL}/apache-maven-${MAVEN_VERSION}-bin.tar.gz" -o maven.tar.gz - mkdir -p "$HOME/maven" - tar -xzf maven.tar.gz -C "$HOME/maven" - echo "M2_HOME=$HOME/maven/apache-maven-${MAVEN_VERSION}" >> "$GITHUB_ENV" - echo "$HOME/maven/apache-maven-${MAVEN_VERSION}/bin" >> "$GITHUB_PATH" - rm maven.tar.gz - - - name: Build with Maven 4 - run: mvn -B --no-transfer-progress package --file pom.xml -DCI_ENV=true verify diff --git a/archetype/pom.xml b/archetype/pom.xml index de83e5045..7aa999942 100644 --- a/archetype/pom.xml +++ b/archetype/pom.xml @@ -20,51 +20,51 @@ under the License. --> - - - - stormcrawler-archetype - stormcrawler-archetype - - - - src/main/resources - true - - archetype-resources/pom.xml - archetype-resources/crawler-conf.yaml - - - - src/main/resources - false - - archetype-resources/pom.xml - - - - - - org.apache.maven.archetype - archetype-packaging - 3.4.1 - - - - - - maven-archetype-plugin - 3.4.1 - - - org.apache.maven.plugins - maven-resources-plugin - 3.3.1 - - \ - - - - - + + + + stormcrawler-archetype + stormcrawler-archetype + + + + src/main/resources + true + + archetype-resources/pom.xml + archetype-resources/crawler-conf.yaml + + + + src/main/resources + false + + archetype-resources/pom.xml + + + + + + org.apache.maven.archetype + archetype-packaging + 3.4.1 + + + + + + maven-archetype-plugin + 3.4.1 + + + org.apache.maven.plugins + maven-resources-plugin + 3.3.1 + + \ + + + + + diff --git a/core/pom.xml b/core/pom.xml index 428923f86..98f3fe8b4 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -20,7 +20,6 @@ under the License. --> - diff --git a/external/ai/pom.xml b/external/ai/pom.xml index a34c9fff9..900056bb4 100644 --- a/external/ai/pom.xml +++ b/external/ai/pom.xml @@ -20,38 +20,37 @@ under the License. --> - - - - - stormcrawler-ai - stormcrawler-ai - - https://github.com/apache/stormcrawler/tree/master/external/ai - AI resources for StormCrawler - - - 1.8.0 - 1.8.0 - - - - - dev.langchain4j - langchain4j - ${langchain4j.version} - - - org.apache.opennlp - opennlp-tools - - - - - dev.langchain4j - langchain4j-open-ai - ${langchain4j.openai.version} - - + + + + stormcrawler-ai + stormcrawler-ai + + https://github.com/apache/stormcrawler/tree/master/external/ai + AI resources for StormCrawler + + + 1.8.0 + 1.8.0 + + + + + dev.langchain4j + langchain4j + ${langchain4j.version} + + + org.apache.opennlp + opennlp-tools + + + + + dev.langchain4j + langchain4j-open-ai + ${langchain4j.openai.version} + + \ No newline at end of file diff --git a/external/aws/pom.xml b/external/aws/pom.xml index 29de76d5e..d1999a81b 100644 --- a/external/aws/pom.xml +++ b/external/aws/pom.xml @@ -21,7 +21,6 @@ under the License. - stormcrawler-aws diff --git a/external/langid/pom.xml b/external/langid/pom.xml index 1cf87fb8e..04549c477 100644 --- a/external/langid/pom.xml +++ b/external/langid/pom.xml @@ -21,7 +21,6 @@ under the License. - stormcrawler-langid diff --git a/external/opensearch/archetype/pom.xml b/external/opensearch/archetype/pom.xml index aafdc9b0c..2f77edbfa 100644 --- a/external/opensearch/archetype/pom.xml +++ b/external/opensearch/archetype/pom.xml @@ -20,50 +20,48 @@ under the License. --> - + + + ../../.. + - - ../../.. - + stormcrawler-opensearch-archetype + + maven-archetype - stormcrawler-opensearch-archetype - - maven-archetype + + + + src/main/resources + true + + META-INF/maven/archetype-metadata.xml + + + + src/main/resources + false + + META-INF/maven/archetype-metadata.xml + + + - + + + org.apache.maven.archetype + archetype-packaging + 3.4.1 + + - - - src/main/resources - true - - META-INF/maven/archetype-metadata.xml - - - - src/main/resources - false - - META-INF/maven/archetype-metadata.xml - - - - - - - org.apache.maven.archetype - archetype-packaging - 3.4.1 - - - - - - - maven-archetype-plugin - 3.4.1 - - - - + + + + maven-archetype-plugin + 3.4.1 + + + + diff --git a/external/opensearch/pom.xml b/external/opensearch/pom.xml index 7773e3782..d2be2f67f 100644 --- a/external/opensearch/pom.xml +++ b/external/opensearch/pom.xml @@ -20,7 +20,6 @@ under the License. --> - diff --git a/external/playwright/pom.xml b/external/playwright/pom.xml index 5c3285453..8a03d70c1 100644 --- a/external/playwright/pom.xml +++ b/external/playwright/pom.xml @@ -21,7 +21,6 @@ under the License. - stormcrawler-playwright diff --git a/external/pom.xml b/external/pom.xml index 961ec9f73..2cdb06ec8 100644 --- a/external/pom.xml +++ b/external/pom.xml @@ -42,7 +42,7 @@ under the License. warc - + org.apache.storm storm-client diff --git a/external/selenium/pom.xml b/external/selenium/pom.xml index 057a9315e..325e26ced 100644 --- a/external/selenium/pom.xml +++ b/external/selenium/pom.xml @@ -21,7 +21,6 @@ under the License. - stormcrawler-selenium diff --git a/external/solr/archetype/pom.xml b/external/solr/archetype/pom.xml index 5d855a41f..200afd18b 100644 --- a/external/solr/archetype/pom.xml +++ b/external/solr/archetype/pom.xml @@ -20,50 +20,48 @@ under the License. --> - - - ../../.. - + + ../../.. + - stormcrawler-solr-archetype + stormcrawler-solr-archetype - maven-archetype + maven-archetype - + + + + src/main/resources + true + + META-INF/maven/archetype-metadata.xml + + + + src/main/resources + false + + META-INF/maven/archetype-metadata.xml + + + - - - src/main/resources - true - - META-INF/maven/archetype-metadata.xml - - - - src/main/resources - false - - META-INF/maven/archetype-metadata.xml - - - + + + org.apache.maven.archetype + archetype-packaging + 3.4.1 + + - - - org.apache.maven.archetype - archetype-packaging - 3.4.1 - - - - - - - maven-archetype-plugin - 3.4.1 - - - - + + + + maven-archetype-plugin + 3.4.1 + + + + diff --git a/external/solr/pom.xml b/external/solr/pom.xml index fdcd324bb..e4d734b8d 100644 --- a/external/solr/pom.xml +++ b/external/solr/pom.xml @@ -21,7 +21,6 @@ under the License. - stormcrawler-solr diff --git a/external/sql/pom.xml b/external/sql/pom.xml index 630c8aa36..d0ce4285e 100644 --- a/external/sql/pom.xml +++ b/external/sql/pom.xml @@ -21,7 +21,6 @@ under the License. - stormcrawler-sql diff --git a/external/tika/pom.xml b/external/tika/pom.xml index 14f74c030..6d1972b03 100644 --- a/external/tika/pom.xml +++ b/external/tika/pom.xml @@ -20,7 +20,6 @@ under the License. --> - diff --git a/external/urlfrontier/pom.xml b/external/urlfrontier/pom.xml index 6a18aa26d..eba8b7ec1 100644 --- a/external/urlfrontier/pom.xml +++ b/external/urlfrontier/pom.xml @@ -20,7 +20,6 @@ under the License. --> - diff --git a/external/warc/pom.xml b/external/warc/pom.xml index 858dfc941..0a265219b 100644 --- a/external/warc/pom.xml +++ b/external/warc/pom.xml @@ -21,7 +21,6 @@ under the License. - stormcrawler-warc @@ -58,12 +57,12 @@ under the License. ${storm-client.version} - + jdk.tools jdk.tools - + org.apache.hive.hcatalog hive-webhcat-java-client diff --git a/pom.xml b/pom.xml index c342ce93c..94535b4e5 100644 --- a/pom.xml +++ b/pom.xml @@ -21,7 +21,6 @@ under the License. - org.apache apache @@ -73,8 +72,8 @@ under the License. 2.0.17 26.0.2-1 2.17.0 - 1.28.0 - 1.20.0 + 1.28.0 + 1.20.0 5.4 1.21.3 2.7.0 @@ -467,34 +466,34 @@ under the License. - - org.apache.maven.plugins - maven-checkstyle-plugin - ${checkstyle-maven-plugin.version} - - - com.puppycrawl.tools - checkstyle - ${checkstyle.version} - - - - checkstyle.xml - - - - - check - - - - + + org.apache.maven.plugins + maven-checkstyle-plugin + ${checkstyle-maven-plugin.version} + + + com.puppycrawl.tools + checkstyle + ${checkstyle.version} + + + + checkstyle.xml + + + + + check + + + + + that are not checked into Git --> rat @@ -515,33 +514,33 @@ under the License. ${project.basedir}/rat **/*.ndjson - **/*.mapping - **/*.flux - **/*.txt - **/*.rss - **/*.tar.gz - **/README.md - **/target/** - **/warc.inputs - **/llm-default-prompt.txt - LICENSE - NOTICE - CONTRIBUTING.md - RELEASING.md + **/*.mapping + **/*.flux + **/*.txt + **/*.rss + **/*.tar.gz + **/README.md + **/target/** + **/warc.inputs + **/llm-default-prompt.txt + LICENSE + NOTICE + CONTRIBUTING.md + RELEASING.md external/opensearch/dashboards/** external/solr/archetype/src/main/resources/archetype-resources/configsets/** - THIRD-PARTY.properties - THIRD-PARTY.txt + THIRD-PARTY.properties + THIRD-PARTY.txt .github/ISSUE_TEMPLATE/*.yml - .github/*.md - .mvn/*.config - .gitattributes - **/dependency-reduced-pom.xml - .editorconfig - **/.settings/**/* - **/.classpath - **/.project - **/.idea + .github/*.md + .mvn/*.config + .gitattributes + **/dependency-reduced-pom.xml + .editorconfig + **/.settings/**/* + **/.classpath + **/.project + **/.idea @@ -660,17 +659,17 @@ under the License. import - - org.apache.commons - commons-compress - ${commons.compress.version} - + + org.apache.commons + commons-compress + ${commons.compress.version} + - - commons-codec - commons-codec - ${commons.codec.version} - + + commons-codec + commons-codec + ${commons.codec.version} +