Skip to content
This repository was archived by the owner on Oct 20, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,22 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Scala
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: 11
java-version: 21
cache: sbt

- uses: sbt/setup-sbt@v1

- name: Show output of sed to build.sbt command for logs
run: |
VERS_TAG=$(echo $GIT_REF | sed 's/refs\/tags\/v//g')
VERS_TAG="\"$VERS_TAG"\"
sed "s/version := *.*/version := $VERS_TAG/" build.sbt

- name: Write Tag to Scala Build Version
run: |
VERS_TAG=$(echo $GIT_REF | sed 's/refs\/tags\/v//g')
Expand All @@ -44,11 +49,15 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# Submit SBT dependencies to the GitHub dependency graph.
- name: Sbt Dependency Submission
uses: scalacenter/sbt-dependency-submission@v3.1.0

- name: Build Universal Artifact
run: sbt Universal/packageBin

- name: Archive package and documentation
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: csvw-check-universal
path: target/universal/csvw-check-*.zip
Expand All @@ -68,7 +77,7 @@ jobs:
rel_cand_substring='rc'
vers_num_lower_case=${vers_num,,}

local_param="csvwcheck:$vers_num"
local_param="csvw-check:$vers_num"
remote_param="gsscogs/csvw-check"
push_param="$remote_param:v$vers_num"
latest_param="$remote_param:latest"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/reusable-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
test_in_environments:
runs-on: ${{ inputs.os }}
container:
image: sbtscala/scala-sbt:graalvm-ce-22.3.0-b2-java17_1.8.3_2.13.10
image: sbtscala/scala-sbt:graalvm-ce-22.3.3-b1-java17_1.10.7_3.6.3
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -24,7 +24,7 @@ jobs:

- name: Archive test results from xml files
if: always()
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: ${{ inputs.os }} test results
path: |
Expand All @@ -35,7 +35,7 @@ jobs:
runs-on: ubuntu-latest
if: always()
steps:
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
with:
name: ${{ inputs.os }} test results

Expand Down
43 changes: 24 additions & 19 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,43 +1,48 @@
import com.typesafe.sbt.packager.docker.Cmd

name := "csvw-check"

organization := "io.github.gss-cogs"
version := "0.0.3"
maintainer := "csvcubed@gsscogs.uk"

scalaVersion := "2.13.4"
scalaVersion := "2.13.16"
scalacOptions ++= Seq("-deprecation", "-feature")
autoCompilerPlugins := true

enablePlugins(JavaAppPackaging)
enablePlugins(DockerPlugin)
enablePlugins(UniversalPlugin)
enablePlugins(DockerPlugin)
enablePlugins(AshScriptPlugin)

dockerBaseImage := "openjdk:11"
dockerEntrypoint := Seq("bash")
dockerBaseImage := "eclipse-temurin:23-jre-alpine"
dockerEntrypoint := Seq("/opt/docker/bin/csvw-check")
dockerEnvVars := Map("PATH" -> "$PATH:/opt/docker/bin")
Docker / packageName := "csvwcheck"
Docker / packageName := "csvw-check"

libraryDependencies += "io.cucumber" %% "cucumber-scala" % "8.14.1" % Test
libraryDependencies += "io.cucumber" % "cucumber-junit" % "7.11.1" % Test
libraryDependencies += "io.cucumber" %% "cucumber-scala" % "8.26.1" % Test
libraryDependencies += "io.cucumber" % "cucumber-junit" % "7.21.1" % Test
libraryDependencies += "com.novocode" % "junit-interface" % "0.11" % Test
libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.15" % Test
libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.19" % Test
libraryDependencies += "com.github.pathikrit" %% "better-files" % "3.9.2" % Test

libraryDependencies += "org.scala-lang.modules" %% "scala-parser-combinators" % "1.1.2"
libraryDependencies += "io.spray" %% "spray-json" % "1.3.6"
libraryDependencies += "org.apache.jena" % "jena-arq" % "4.4.0"
libraryDependencies += "joda-time" % "joda-time" % "2.12.2"
libraryDependencies += "org.apache.jena" % "jena-arq" % "5.3.0"
libraryDependencies += "joda-time" % "joda-time" % "2.13.1"
libraryDependencies += "com.github.scopt" %% "scopt" % "4.1.0"
// Past version 2.6.21 AKKA starts requiring a license key which doesn't fit the use-case of this OSS project at all.
// Unfortunately it looks like we will want to stop using AKKA entirely, which would require quite a bit of work.
libraryDependencies += "com.typesafe.akka" %% "akka-stream" % "2.6.21"
libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.5.3"
libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.5.16"
libraryDependencies += "com.typesafe.scala-logging" %% "scala-logging" % "3.9.5"
libraryDependencies += "com.fasterxml.jackson.core" % "jackson-databind" % "2.14.2"
libraryDependencies += "com.fasterxml.jackson.core" % "jackson-annotations" % "2.14.2"
libraryDependencies += "com.fasterxml.jackson.core" % "jackson-core" % "2.14.2"
libraryDependencies += "com.softwaremill.sttp.client3" %% "core" % "3.8.15"
libraryDependencies += "com.ibm.icu" % "icu4j" % "72.1"
libraryDependencies += "org.apache.commons" % "commons-csv" % "1.10.0"
libraryDependencies += "com.chuusai" %% "shapeless" % "2.3.10"
libraryDependencies += "com.fasterxml.jackson.core" % "jackson-databind" % "2.18.2"
libraryDependencies += "com.fasterxml.jackson.core" % "jackson-annotations" % "2.18.2"
libraryDependencies += "com.fasterxml.jackson.core" % "jackson-core" % "2.18.2"
libraryDependencies += "com.softwaremill.sttp.client3" %% "core" % "3.10.3"
libraryDependencies += "com.ibm.icu" % "icu4j" % "76.1"
libraryDependencies += "org.apache.commons" % "commons-csv" % "1.13.0"
libraryDependencies += "com.chuusai" %% "shapeless" % "2.3.12"

publishTo := Some("GitHub Maven package repo for GSS-Cogs" at "https://maven.pkg.github.com/gss-cogs/csvw-check")
publishMavenStyle := true
Expand All @@ -50,4 +55,4 @@ credentials += Credentials(

organizationName := "Crown Copyright (Office for National Statistics)"
startYear := Some(2020)
licenses += ("Apache-2.0", new URL("https://www.apache.org/licenses/LICENSE-2.0.txt"))
licenses += ("Apache-2.0", new URI("https://www.apache.org/licenses/LICENSE-2.0.txt").toURL)
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.5.8
sbt.version=1.10.7
2 changes: 1 addition & 1 deletion project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
addSbtPlugin("org.jetbrains" % "sbt-ide-settings" % "1.1.0")
addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.8.0")
addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.11.1")
11 changes: 8 additions & 3 deletions src/main/scala/csvwcheck/ConfiguredObjectMapper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,19 @@

package csvwcheck

import com.fasterxml.jackson.databind.node.JsonNodeFactory
import com.fasterxml.jackson.databind.cfg.JsonNodeFeature
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}

object ConfiguredObjectMapper {
val objectMapper = new ObjectMapper()
objectMapper.setNodeFactory(JsonNodeFactory.withExactBigDecimals(true))

objectMapper.configure(
DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS,
JsonNodeFeature.STRIP_TRAILING_BIGDECIMAL_ZEROES,
true
)

objectMapper.configure(
DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS,
true
)
}
21 changes: 17 additions & 4 deletions src/main/scala/csvwcheck/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ object Main extends App {

implicit val actorSystem: ActorSystem = ActorSystem("actor-system")


val numParallelThreads: Int = sys.env.get("PARALLELISM") match {
case Some(value) => value.toInt
case None => Runtime.getRuntime.availableProcessors()
Expand Down Expand Up @@ -116,21 +115,35 @@ object Main extends App {
val rootLogger = Logger("ROOT")
val underlyingLogger = rootLogger.underlying.asInstanceOf[ch.qos.logback.classic.Logger]
underlyingLogger.setLevel(logLevel)

val akkaLogLevel = logLevel match {
case Level.WARN => "WARNING"
case Level.TRACE => "DEBUG"
case Level.OFF|Level.INFO|Level.DEBUG|Level.ERROR => logLevel.toString
}
System.setProperty("akka.loglevel", akkaLogLevel)

rootLogger
}

private def getDescriptionForMessage(
errorMessage: MessageWithCsvContext
): String = {
val message = new StringBuilder()

message.append(s"Type: ${errorMessage.`type`}")

errorMessage.csvFilePath
.foreach(csvFilePath => message.append(s" in CSV '$csvFilePath'"))

if (errorMessage.row.nonEmpty) {
message.append(s"Row: ${errorMessage.row}$newLine")
message.append(s", Row: ${errorMessage.row}")
}
if (errorMessage.column.nonEmpty) {
message.append(s", Column: ${errorMessage.column}$newLine")
message.append(s", Column: '${errorMessage.column}'")
}
if (errorMessage.content.nonEmpty) {
message.append(s": ${errorMessage.content}$newLine")
message.append(s"$newLine${errorMessage.content}$newLine")
}

message.toString()
Expand Down
6 changes: 2 additions & 4 deletions src/main/scala/csvwcheck/Validator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,8 @@ class Validator(
val metadataJsonLocation =
csvwLinkHeaderRegEx.replaceAllIn(header, "$1")
// Now make the URL absolute if it isn't already.
new URL(
new URL(getUriWithoutQueryString(csvUri).toString),
metadataJsonLocation
).toURI
getUriWithoutQueryString(csvUri)
.resolve(metadataJsonLocation)
})
} else {
None
Expand Down
8 changes: 6 additions & 2 deletions src/main/scala/csvwcheck/errors/MessageWithCsvContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ abstract class MessageWithCsvContext {
def content: String

def constraints: String

def csvFilePath: Option[String]
}

case class ErrorWithCsvContext(
Expand All @@ -39,7 +41,8 @@ case class ErrorWithCsvContext(
row: String,
column: String,
content: String,
constraints: String
constraints: String,
csvFilePath: Option[String] = None
) extends MessageWithCsvContext {}

case class WarningWithCsvContext(
Expand All @@ -48,5 +51,6 @@ case class WarningWithCsvContext(
row: String,
column: String,
content: String,
constraints: String
constraints: String,
csvFilePath: Option[String] = None
) extends MessageWithCsvContext {}
14 changes: 8 additions & 6 deletions src/main/scala/csvwcheck/models/Column.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import csvwcheck.errors.{ErrorWithCsvContext, ErrorWithoutContext, MetadataError
import csvwcheck.models
import csvwcheck.models.Column._
import csvwcheck.models.ParseResult.ParseResult
import csvwcheck.models.Values.ColumnValue
import csvwcheck.normalisation.Constants.undefinedLanguage
import csvwcheck.normalisation.Utils.parseNodeAsText
import csvwcheck.numberformatparser.LdmlNumberFormatParser
Expand All @@ -33,6 +34,7 @@ import org.joda.time.{DateTime, DateTimeZone}

import java.math.BigInteger
import java.time.{LocalDateTime, Month, ZoneId, ZonedDateTime}
import java.util.regex.Pattern
import scala.collection.mutable.ArrayBuffer
import scala.jdk.CollectionConverters.IteratorHasAsScala
import scala.math.BigInt.javaBigInteger2bigInt
Expand Down Expand Up @@ -1135,7 +1137,7 @@ case class Column private(

def validate(
value: String
): (Array[ErrorWithoutContext], List[Any]) = {
): (Array[ErrorWithoutContext], ColumnValue) = {
val errors = ArrayBuffer.empty[ErrorWithoutContext]
if (nullParam.contains(value)) {
// Since the cell value is among the null values specified for this CSV-W, it can be considered as the default null value which is ""
Expand All @@ -1145,9 +1147,9 @@ case class Column private(
}
(errors.toArray, List.empty)
} else {
val valuesArrayToReturn = ArrayBuffer.empty[Any]
val parsedColumnValues = ArrayBuffer.empty[Any]
val values = separator match {
case Some(separator) => value.split(separator)
case Some(separator) => value.split(Pattern.quote(separator))
case None => Array[String](value)
}
val parserForDataType = datatypeParser(baseDataType)
Expand All @@ -1160,7 +1162,7 @@ case class Column private(
s"'$v' - ${errorMessageContent.content} (${format.flatMap(_.pattern).getOrElse("no format provided")})"
)
)
valuesArrayToReturn.addOne(s"invalid - $v")
parsedColumnValues.addOne(s"invalid - $v")
case Right(s) =>
errors.addAll(validateLength(s.toString))
errors.addAll(validateValue(s))
Expand All @@ -1174,11 +1176,11 @@ case class Column private(
}

if (errors.isEmpty) {
valuesArrayToReturn.addOne(s)
parsedColumnValues.addOne(s)
}
}
}
(errors.toArray, valuesArrayToReturn.toList)
(errors.toArray, parsedColumnValues.toList)
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/csvwcheck/models/ForeignKeyDefinition.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode
case class ForeignKeyDefinition(
jsonObject: ObjectNode,
localColumns: Array[Column]
) {
) extends Key {
override def toString: String =
s"ForeignKeyDefinition([${localColumns.map(_.name.getOrElse("unnamed column")).mkString(", ")}])"

Expand Down
8 changes: 8 additions & 0 deletions src/main/scala/csvwcheck/models/Key.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package csvwcheck.models

/**
* Represents a primary or foreign key.
*/
abstract class Key {

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@

package csvwcheck.models

case class KeyWithContext(
rowNumber: Long,
keyValues: List[Any],
var isDuplicate: Boolean = false
import csvwcheck.models.Values.KeyValue

case class KeyValueWithContext(
rowNumber: Long,
keyValue: KeyValue,
var isDuplicate: Boolean = false
) {

/**
Expand All @@ -29,13 +31,13 @@ case class KeyWithContext(
*/
override def equals(obj: Any): Boolean =
obj != null &&
obj.isInstanceOf[KeyWithContext] &&
this.keyValues.equals(obj.asInstanceOf[KeyWithContext].keyValues)
obj.isInstanceOf[KeyValueWithContext] &&
this.keyValue.equals(obj.asInstanceOf[KeyValueWithContext].keyValue)

override def hashCode(): Int = this.keyValues.hashCode()
override def hashCode(): Int = this.keyValue.hashCode()

def keyValuesToString(): String = {
val stringList = keyValues.map {
val stringList = keyValue.map {
case listOfAny: List[Any] =>
listOfAny.map(s => s.toString).mkString(",")
case i => i.toString
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ case class ReferencedTableForeignKeyReference(
* The table the foreign key was defined on.
*/
definitionTable: Table
) {
) extends Key {
override def toString: String =
s"ReferencedTableForeignKeyReference($definitionTable.[${
foreignKeyDefinition.localColumns
Expand Down
Loading