diff --git a/.cursorrules b/.cursor/rules similarity index 100% rename from .cursorrules rename to .cursor/rules diff --git a/.github/workflows/publish-instrumentation-openai.yml b/.github/workflows/publish-instrumentation-openai.yml new file mode 100644 index 0000000..8e44762 --- /dev/null +++ b/.github/workflows/publish-instrumentation-openai.yml @@ -0,0 +1,44 @@ +name: Publish judgeval-instrumentation-openai + +on: + workflow_dispatch: + +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Java and Maven Central Repository + uses: actions/setup-java@v4 + with: + java-version: "21" + distribution: "temurin" + server-id: central + server-username: MAVEN_USERNAME + server-password: MAVEN_PASSWORD + + - name: Import GPG key + run: | + echo "${{ secrets.GPG_PRIVATE_KEY }}" | base64 --decode | gpg --import --batch --yes + echo "use-agent" >> ~/.gnupg/gpg.conf + echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf + echo "allow-loopback-pinentry" >> ~/.gnupg/gpg-agent.conf + gpg-connect-agent reloadagent /bye + gpg --list-secret-keys --keyid-format LONG + + - name: Deploy judgeval-instrumentation-openai + run: | + mvn -B -pl instrumentation/judgeval-instrumentation-openai -am -e deploy \ + -Dgpg.passphrase="${{ secrets.GPG_PASSPHRASE }}" \ + -Dgpg.pinentry-mode=loopback \ + --update-snapshots + env: + MAVEN_USERNAME: ${{ secrets.CENTRAL_PORTAL_USERNAME }} + MAVEN_PASSWORD: ${{ secrets.CENTRAL_PORTAL_TOKEN }} + GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} + + - name: Done + run: echo "Published judgeval-instrumentation-openai" + + diff --git a/.github/workflows/publish-judgeval-java.yml b/.github/workflows/publish-judgeval-java.yml new file mode 100644 index 0000000..5638b28 --- /dev/null +++ b/.github/workflows/publish-judgeval-java.yml @@ -0,0 +1,42 @@ +name: Publish judgeval-java + +on: + workflow_dispatch: + +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Java and Maven Central Repository + uses: actions/setup-java@v4 + with: + java-version: "21" + distribution: "temurin" + server-id: central + server-username: MAVEN_USERNAME + server-password: MAVEN_PASSWORD + + - name: Import GPG key + run: | + echo "${{ secrets.GPG_PRIVATE_KEY }}" | base64 --decode | gpg --import --batch --yes + echo "use-agent" >> ~/.gnupg/gpg.conf + echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf + echo "allow-loopback-pinentry" >> ~/.gnupg/gpg-agent.conf + gpg-connect-agent reloadagent /bye + gpg --list-secret-keys --keyid-format LONG + + - name: Deploy judgeval-java + run: | + mvn -B -pl judgeval-java -am -e deploy \ + -Dgpg.passphrase="${{ secrets.GPG_PASSPHRASE }}" \ + -Dgpg.pinentry-mode=loopback \ + --update-snapshots + env: + MAVEN_USERNAME: ${{ secrets.CENTRAL_PORTAL_USERNAME }} + MAVEN_PASSWORD: ${{ secrets.CENTRAL_PORTAL_TOKEN }} + GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} + + - name: Done + run: echo "Published judgeval-java" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml deleted file mode 100644 index 4102176..0000000 --- a/.github/workflows/publish.yml +++ /dev/null @@ -1,87 +0,0 @@ -name: Publish package to Maven Central via Central Portal -on: - workflow_dispatch: - release: - types: [created] - push: - branches: [main] -jobs: - test-install: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Set up Java - uses: actions/setup-java@v4 - with: - java-version: "21" - distribution: "temurin" - - - name: Cache Maven packages - uses: actions/cache@v3 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - - - name: Test Install - run: | - mvn clean install -Dgpg.skip=true - - publish: - needs: test-install - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Set up Java and Maven Central Repository - uses: actions/setup-java@v4 - with: - java-version: "21" - distribution: "temurin" - server-id: central - server-username: MAVEN_USERNAME - server-password: MAVEN_PASSWORD - - - name: Verify POM groupId - run: | - echo "Checking pom.xml for correct groupId..." - grep -E ".*" pom.xml | head -1 - echo "Expected: com.judgmentlabs" - - - name: Show Maven coordinates - run: | - G=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.groupId) - A=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.artifactId) - V=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.version) - echo "GAV: ${G}:${A}:${V}" - - - name: Clean Maven cache - run: | - rm -rf ~/.m2/repository/com/judgment - rm -rf ~/.m2/repository/com/judgmentlabs - - - name: Import GPG key - run: | - echo "${{ secrets.GPG_PRIVATE_KEY }}" | base64 --decode | gpg --import --batch --yes - echo "use-agent" >> ~/.gnupg/gpg.conf - echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf - echo "allow-loopback-pinentry" >> ~/.gnupg/gpg-agent.conf - gpg-connect-agent reloadagent /bye - gpg --list-secret-keys --keyid-format LONG - - - name: Clean and Deploy to Central Portal - run: | - mvn clean - mvn -e --batch-mode deploy \ - -Dgpg.passphrase="${{ secrets.GPG_PASSPHRASE }}" \ - -Dgpg.pinentry-mode=loopback \ - --update-snapshots - env: - MAVEN_USERNAME: ${{ secrets.CENTRAL_PORTAL_USERNAME }} - MAVEN_PASSWORD: ${{ secrets.CENTRAL_PORTAL_TOKEN }} - GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} - - - name: Deployment Status - run: | - echo "Deployment completed - check https://central.sonatype.com for status" diff --git a/.github/workflows/test-instrumentation-openai.yml b/.github/workflows/test-instrumentation-openai.yml new file mode 100644 index 0000000..08252f8 --- /dev/null +++ b/.github/workflows/test-instrumentation-openai.yml @@ -0,0 +1,39 @@ +name: Test judgeval-instrumentation-openai + +on: + pull_request: + branches: [main] + paths: + - "instrumentation/judgeval-instrumentation-openai/**" + - "pom.xml" + - ".github/workflows/test-instrumentation-openai.yml" + +jobs: + test-install: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Java + uses: actions/setup-java@v4 + with: + java-version: "21" + distribution: "temurin" + + - name: Cache Maven packages + uses: actions/cache@v3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + + - name: Test Install + run: | + mvn -B -Dgpg.skip=true -pl instrumentation/judgeval-instrumentation-openai -am clean install + + - name: Verify Build Artifacts + run: | + echo "Checking build artifacts..." + ls -la instrumentation/judgeval-instrumentation-openai/target/ || true + echo "JAR files:" + find instrumentation/judgeval-instrumentation-openai/target -name "*.jar" -type f diff --git a/.github/workflows/test-install.yml b/.github/workflows/test-judgeval-java.yml similarity index 68% rename from .github/workflows/test-install.yml rename to .github/workflows/test-judgeval-java.yml index ee0acf6..76fb936 100644 --- a/.github/workflows/test-install.yml +++ b/.github/workflows/test-judgeval-java.yml @@ -1,8 +1,12 @@ -name: Test Install +name: Test judgeval-java on: pull_request: branches: [main] + paths: + - "judgeval-java/**" + - "pom.xml" + - ".github/workflows/test-judgeval-java.yml" jobs: test-install: @@ -25,11 +29,11 @@ jobs: - name: Test Install run: | - mvn clean install -Dgpg.skip=true + mvn -B -Dgpg.skip=true -pl judgeval-java -am clean install - name: Verify Build Artifacts run: | echo "Checking build artifacts..." - ls -la target/ + ls -la judgeval-java/target/ || true echo "JAR files:" - find target/ -name "*.jar" -type f + find judgeval-java/target -name "*.jar" -type f diff --git a/Makefile b/Makefile index 4a7a5fc..d0ce4c7 100644 --- a/Makefile +++ b/Makefile @@ -1,43 +1,91 @@ -.PHONY: help format check test clean build generate-client +.PHONY: format format-core format-openai install install-core install-openai status status-core status-openai check test clean build generate-client run -help: ## Show this help message - @echo "Available commands:" - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' +format: + @echo "[format] judgeval-java" + mvn -B -pl judgeval-java -am spotless:apply + @echo "[format] instrumentation/judgeval-instrumentation-openai" + mvn -B -pl instrumentation/judgeval-instrumentation-openai -am spotless:apply -format: ## Format code using Spotless - mvn spotless:apply +format-core: + mvn -B -pl judgeval-java -am spotless:apply -check: ## Run all code quality checks - mvn compile checkstyle:check spotless:check +format-openai: + mvn -B -pl instrumentation/judgeval-instrumentation-openai -am spotless:apply -test: ## Run tests +check: + mvn -B compile checkstyle:check spotless:check + +test: mvn test -clean: ## Clean build artifacts +clean: mvn clean -build: ## Build the project - mvn clean compile +build: + mvn -B clean compile + +install: + @echo "[install] judgeval-java" + mvn -B -Dgpg.skip=true -pl judgeval-java -am clean install + @echo "[install] instrumentation/judgeval-instrumentation-openai" + mvn -B -Dgpg.skip=true -pl instrumentation/judgeval-instrumentation-openai -am clean install + +install-core: + mvn -B -Dgpg.skip=true -pl judgeval-java -am clean install -install: ## Install the project - mvn clean install -Dgpg.skip=true +install-openai: + mvn -B -Dgpg.skip=true -pl instrumentation/judgeval-instrumentation-openai -am clean install -generate-client: ## Generate API client from OpenAPI spec +generate-client: ./scripts/generate-client.sh make format -lint: ## Run linting only - mvn checkstyle:check +lint: + mvn -B checkstyle:check + +format-check: + mvn -B spotless:check + +ci: + mvn -B clean compile test checkstyle:check spotless:check + +status: + $(MAKE) status-core + $(MAKE) status-openai + +status-core: + @echo "[status] judgeval-java" + @G=$$(mvn -q -pl judgeval-java -DforceStdout help:evaluate -Dexpression=project.groupId); \ + A=$$(mvn -q -pl judgeval-java -DforceStdout help:evaluate -Dexpression=project.artifactId); \ + V=$$(mvn -q -pl judgeval-java -DforceStdout help:evaluate -Dexpression=project.version); \ + echo "GAV: $$G:$$A:$$V"; \ + ls -1 judgeval-java/target/*.jar 2>/dev/null || echo "No jar built" + +status-openai: + @echo "[status] instrumentation/judgeval-instrumentation-openai" + @G=$$(mvn -q -pl instrumentation/judgeval-instrumentation-openai -DforceStdout help:evaluate -Dexpression=project.groupId); \ + A=$$(mvn -q -pl instrumentation/judgeval-instrumentation-openai -DforceStdout help:evaluate -Dexpression=project.artifactId); \ + V=$$(mvn -q -pl instrumentation/judgeval-instrumentation-openai -DforceStdout help:evaluate -Dexpression=project.version); \ + echo "GAV: $$G:$$A:$$V"; \ + ls -1 instrumentation/judgeval-instrumentation-openai/target/*.jar 2>/dev/null || echo "No jar built" + +MAIN ?= + +ifneq (,$(filter run,$(MAKECMDGOALS))) +EXAMPLE := $(word 2,$(MAKECMDGOALS)) +ifeq ($(EXAMPLE),) +$(error Usage: make run [MAIN=ClassName]) +endif +$(eval $(EXAMPLE):;@:) +endif -format-check: ## Check formatting without applying - mvn spotless:check -ci: ## Run CI checks (compile, test, checkstyle, spotless) - mvn clean compile test checkstyle:check spotless:check -run: ## Run a specific Java class with environment variables (usage: make run CLASS=com.example.MyClass) - @if [ -f .env ]; then \ - export $$(grep -v '^#' .env | grep -v '^$$' | xargs) && mvn exec:java -Dexec.mainClass="$(CLASS)"; \ - else \ - mvn exec:java -Dexec.mainClass="$(CLASS)"; \ - fi +run: + @echo "[run] examples.$(EXAMPLE)" + if [ -f .env ]; then export $$(grep -v '^#' .env | grep -v '^$$' | xargs); fi; \ + MAIN_CLASS=$(MAIN); \ + if [ -z "$$MAIN_CLASS" ]; then \ + MAIN_CLASS=$$(ls examples/src/main/java/examples/$(EXAMPLE)/*.java | head -n1 | xargs -n1 basename | sed 's/\.java$$//'); \ + fi; \ + mvn -q -f examples/pom.xml -DskipTests -Dexec.cleanupDaemonThreads=false -Dexec.mainClass=examples.$(EXAMPLE).$$MAIN_CLASS clean compile exec:java diff --git a/README.md b/README.md index 84a3233..92f27db 100644 --- a/README.md +++ b/README.md @@ -1 +1,4 @@ # Judgeval Java SDK + +[![Maven Central](https://img.shields.io/maven-central/v/com.judgmentlabs/judgeval-java)](https://central.sonatype.com/artifact/com.judgmentlabs/judgeval-java) +[![javadoc](https://javadoc.io/badge2/com.judgmentlabs/judgeval-java/javadoc.svg)](https://javadoc.io/doc/com.openai/openai-java) \ No newline at end of file diff --git a/examples/pom.xml b/examples/pom.xml new file mode 100644 index 0000000..ef2f5fc --- /dev/null +++ b/examples/pom.xml @@ -0,0 +1,78 @@ + + + 4.0.0 + + com.judgmentlabs + judgeval-parent + 0.0.0 + ../pom.xml + + examples + 0.0.1 + jar + Judgeval Examples + + + + io.opentelemetry + opentelemetry-bom + 1.55.0 + pom + import + + + + + + com.judgmentlabs + judgeval-java + + + io.opentelemetry + opentelemetry-api + + + + + com.judgmentlabs + judgeval-instrumentation-openai + + + com.openai + openai-java + 4.6.1 + + + io.opentelemetry + opentelemetry-api + 1.55.0 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.12.1 + + 21 + 21 + + + + org.codehaus.mojo + exec-maven-plugin + 3.4.1 + + + + java + + + + + + + + + diff --git a/examples/src/main/java/examples/simple_chat/SimpleChat.java b/examples/src/main/java/examples/simple_chat/SimpleChat.java new file mode 100644 index 0000000..c04d465 --- /dev/null +++ b/examples/src/main/java/examples/simple_chat/SimpleChat.java @@ -0,0 +1,38 @@ +package examples.simple_chat; + +import java.time.Duration; + +import com.judgmentlabs.judgeval.instrumentation.openai.OpenAITelemetry; +import com.judgmentlabs.judgeval.tracer.Tracer; +import com.openai.client.OpenAIClient; +import com.openai.client.okhttp.OpenAIOkHttpClient; +import com.openai.models.ChatModel; +import com.openai.models.chat.completions.ChatCompletionCreateParams; + +import io.opentelemetry.api.GlobalOpenTelemetry; + +public class SimpleChat { + public static void main(String[] args) { + var tracer = Tracer.createDefault("SimpleChat-Java"); + tracer.initialize(); + + OpenAIClient baseClient = OpenAIOkHttpClient.fromEnv(); + var otelClient = OpenAITelemetry.builder(GlobalOpenTelemetry.get()).build().wrap(baseClient); + + tracer.span("chat.session", () -> { + + var req = ChatCompletionCreateParams.builder() + .model(ChatModel.GPT_4O_MINI) + .maxCompletionTokens(512) + .addUserMessage("Say hi.") + .build(); + var res = otelClient.chat().completions().create(req); + System.out.println(String.valueOf(res)); + }); + + try { + Thread.sleep(Duration.ofSeconds(5).toMillis()); + } catch (InterruptedException ignored) { + } + } +} diff --git a/instrumentation/judgeval-instrumentation-openai/pom.xml b/instrumentation/judgeval-instrumentation-openai/pom.xml new file mode 100644 index 0000000..c73435c --- /dev/null +++ b/instrumentation/judgeval-instrumentation-openai/pom.xml @@ -0,0 +1,36 @@ + + + 4.0.0 + + com.judgmentlabs + judgeval-parent + 0.0.0 + ../../pom.xml + + judgeval-instrumentation-openai + ${judgeval-instrumentation-openai.version} + jar + Judgeval Instrumentation OpenAI + + + io.opentelemetry.instrumentation + opentelemetry-openai-java-1.1 + 2.21.0-alpha + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.12.1 + + 21 + 21 + + + + + + + diff --git a/instrumentation/judgeval-instrumentation-openai/src/main/java/com/judgmentlabs/judgeval/instrumentation/openai/OpenAITelemetry.java b/instrumentation/judgeval-instrumentation-openai/src/main/java/com/judgmentlabs/judgeval/instrumentation/openai/OpenAITelemetry.java new file mode 100644 index 0000000..e50daa8 --- /dev/null +++ b/instrumentation/judgeval-instrumentation-openai/src/main/java/com/judgmentlabs/judgeval/instrumentation/openai/OpenAITelemetry.java @@ -0,0 +1,17 @@ +package com.judgmentlabs.judgeval.instrumentation.openai; + +import io.opentelemetry.api.OpenTelemetry; + +public final class OpenAITelemetry { + private OpenAITelemetry() { + } + + public static io.opentelemetry.instrumentation.openai.v1_1.OpenAITelemetryBuilder builder( + OpenTelemetry openTelemetry) { + return io.opentelemetry.instrumentation.openai.v1_1.OpenAITelemetry.builder(openTelemetry); + } + + public static io.opentelemetry.instrumentation.openai.v1_1.OpenAITelemetry create(OpenTelemetry openTelemetry) { + return io.opentelemetry.instrumentation.openai.v1_1.OpenAITelemetry.create(openTelemetry); + } +} diff --git a/judgeval-java/pom.xml b/judgeval-java/pom.xml new file mode 100644 index 0000000..b88d71d --- /dev/null +++ b/judgeval-java/pom.xml @@ -0,0 +1,137 @@ + + + 4.0.0 + + com.judgmentlabs + judgeval-parent + 0.0.0 + ../pom.xml + + judgeval-java + ${judgeval-java.version} + jar + Judgeval Java + Java SDK for Judgeval + https://github.com/judgmentlabs/judgeval-java + + 2.17.0 + 2.43.0 + 3.3.1 + + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + org.junit.jupiter + junit-jupiter + 5.10.1 + test + + + org.mockito + mockito-core + 5.8.0 + test + + + org.mockito + mockito-junit-jupiter + 5.8.0 + test + + + io.opentelemetry + opentelemetry-sdk-testing + 1.39.0 + test + + + com.github.tomakehurst + wiremock-jre8 + 2.35.0 + test + + + org.awaitility + awaitility + 4.2.0 + test + + + io.opentelemetry + opentelemetry-exporter-otlp + 1.39.0 + + + io.opentelemetry + opentelemetry-sdk-trace + 1.39.0 + + + io.opentelemetry + opentelemetry-sdk + 1.39.0 + + + com.google.code.gson + gson + 2.10.1 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.12.1 + + 21 + 21 + + + + org.apache.maven.plugins + maven-jar-plugin + 3.4.1 + + + + ${project.version} + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.3.0 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.6.3 + + + attach-javadocs + + jar + + + + + + + + + diff --git a/src/main/java/com/judgmentlabs/judgeval/Env.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/Env.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/Env.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/Env.java diff --git a/src/main/java/com/judgmentlabs/judgeval/Version.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/Version.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/Version.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/Version.java diff --git a/src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/EvaluationRunBuilder.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/EvaluationRunBuilder.java new file mode 100644 index 0000000..5e0bb09 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/EvaluationRunBuilder.java @@ -0,0 +1,78 @@ +package com.judgmentlabs.judgeval.data; + +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +import com.judgmentlabs.judgeval.Env; +import com.judgmentlabs.judgeval.internal.api.models.ExampleEvaluationRun; +import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +import com.judgmentlabs.judgeval.scorers.BaseScorer; +import com.judgmentlabs.judgeval.scorers.api_scorers.custom_scorer.CustomScorer; + +public class EvaluationRunBuilder { + private String projectName; + private String evalName; + private String model; + private com.judgmentlabs.judgeval.internal.api.models.Example example; + private String traceId; + private String spanId; + private final List scorers = new ArrayList<>(); + + public EvaluationRunBuilder projectName(String v) { + this.projectName = v; + return this; + } + + public EvaluationRunBuilder evalName(String v) { + this.evalName = v; + return this; + } + + public EvaluationRunBuilder model(String v) { + this.model = v; + return this; + } + + public EvaluationRunBuilder example(Example v) { + this.example = (com.judgmentlabs.judgeval.internal.api.models.Example) v; + return this; + } + + public EvaluationRunBuilder addScorer(BaseScorer v) { + this.scorers.add(v); + return this; + } + + public EvaluationRunBuilder trace(String traceId, String spanId) { + this.traceId = traceId; + this.spanId = spanId; + return this; + } + + public ExampleEvaluationRun build() { + ExampleEvaluationRun run = new ExampleEvaluationRun(); + run.setProjectName(projectName); + run.setEvalName(evalName); + run.setModel(model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL); + List judgment = new ArrayList<>(); + List custom = new ArrayList<>(); + for (BaseScorer s : scorers) { + if (s instanceof CustomScorer) + custom.add((com.judgmentlabs.judgeval.internal.api.models.BaseScorer) s); + else + judgment.add(s.getScorerConfig()); + } + run.setJudgmentScorers(judgment); + run.setCustomScorers(custom); + run.setExamples(List.of(example)); + run.setTraceId(traceId); + run.setTraceSpanId(spanId); + run.setId(UUID.randomUUID().toString()); + run.setCreatedAt(Instant.now().atOffset(ZoneOffset.UTC).format(DateTimeFormatter.ISO_INSTANT)); + return run; + } +} diff --git a/src/main/java/com/judgmentlabs/judgeval/data/Example.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/Example.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/data/Example.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/Example.java diff --git a/src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java diff --git a/src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/TraceEvaluationRunBuilder.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/TraceEvaluationRunBuilder.java new file mode 100644 index 0000000..4e3dd33 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/TraceEvaluationRunBuilder.java @@ -0,0 +1,72 @@ +package com.judgmentlabs.judgeval.data; + +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +import com.judgmentlabs.judgeval.Env; +import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +import com.judgmentlabs.judgeval.internal.api.models.TraceEvaluationRun; +import com.judgmentlabs.judgeval.scorers.BaseScorer; +import com.judgmentlabs.judgeval.scorers.api_scorers.custom_scorer.CustomScorer; + +public class TraceEvaluationRunBuilder { + private String projectName; + private String evalName; + private String model; + private String traceId; + private String spanId; + private final List scorers = new ArrayList<>(); + + public TraceEvaluationRunBuilder projectName(String v) { + this.projectName = v; + return this; + } + + public TraceEvaluationRunBuilder evalName(String v) { + this.evalName = v; + return this; + } + + public TraceEvaluationRunBuilder model(String v) { + this.model = v; + return this; + } + + public TraceEvaluationRunBuilder trace(String traceId, String spanId) { + this.traceId = traceId; + this.spanId = spanId; + return this; + } + + public TraceEvaluationRunBuilder addScorer(BaseScorer v) { + this.scorers.add(v); + return this; + } + + public TraceEvaluationRun build() { + TraceEvaluationRun run = new TraceEvaluationRun(); + run.setProjectName(projectName); + run.setEvalName(evalName); + run.setModel(model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL); + List judgment = new ArrayList<>(); + List custom = new ArrayList<>(); + for (BaseScorer s : scorers) { + if (s instanceof CustomScorer) + custom.add((com.judgmentlabs.judgeval.internal.api.models.BaseScorer) s); + else + judgment.add(s.getScorerConfig()); + } + run.setJudgmentScorers(judgment); + run.setCustomScorers(custom); + run.setTraceAndSpanIds(List.of(List.of(traceId, spanId))); + run.setIsOffline(false); + run.setIsBucketRun(false); + run.setId(UUID.randomUUID().toString()); + run.setCreatedAt(Instant.now().atOffset(ZoneOffset.UTC).format(DateTimeFormatter.ISO_INSTANT)); + return run; + } +} diff --git a/src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentAPIError.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentAPIError.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentAPIError.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentAPIError.java diff --git a/src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentRuntimeError.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentRuntimeError.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentRuntimeError.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentRuntimeError.java diff --git a/src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentTestError.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentTestError.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentTestError.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/exceptions/JudgmentTestError.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/BaseScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/BaseScorer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/BaseScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/BaseScorer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/EvalResults.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/EvalResults.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/EvalResults.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/EvalResults.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/EvalResultsFetch.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/EvalResultsFetch.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/EvalResultsFetch.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/EvalResultsFetch.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/Example.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/Example.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/Example.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/Example.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ExampleEvaluationRun.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ExampleEvaluationRun.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/ExampleEvaluationRun.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ExampleEvaluationRun.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/FetchPromptScorersRequest.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/FetchPromptScorersRequest.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/FetchPromptScorersRequest.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/FetchPromptScorersRequest.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/FetchPromptScorersResponse.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/FetchPromptScorersResponse.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/FetchPromptScorersResponse.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/FetchPromptScorersResponse.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/OtelTraceSpan.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/OtelTraceSpan.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/OtelTraceSpan.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/OtelTraceSpan.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/PromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/PromptScorer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/PromptScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/PromptScorer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ResolveProjectNameRequest.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ResolveProjectNameRequest.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/ResolveProjectNameRequest.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ResolveProjectNameRequest.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ResolveProjectNameResponse.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ResolveProjectNameResponse.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/ResolveProjectNameResponse.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ResolveProjectNameResponse.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/SavePromptScorerRequest.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/SavePromptScorerRequest.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/SavePromptScorerRequest.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/SavePromptScorerRequest.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/SavePromptScorerResponse.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/SavePromptScorerResponse.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/SavePromptScorerResponse.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/SavePromptScorerResponse.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerConfig.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerConfig.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerConfig.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerConfig.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerData.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerData.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerData.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerData.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerExistsRequest.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerExistsRequest.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerExistsRequest.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerExistsRequest.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerExistsResponse.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerExistsResponse.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerExistsResponse.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScorerExistsResponse.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScoringResult.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScoringResult.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScoringResult.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/ScoringResult.java diff --git a/src/main/java/com/judgmentlabs/judgeval/internal/api/models/TraceEvaluationRun.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/TraceEvaluationRun.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/internal/api/models/TraceEvaluationRun.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models/TraceEvaluationRun.java diff --git a/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java similarity index 97% rename from src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java index 6ad60e3..565b48b 100644 --- a/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java @@ -6,6 +6,7 @@ import java.util.Optional; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; @@ -33,6 +34,7 @@ public void setThreshold(double threshold) { super.setThreshold(threshold); } + @JsonProperty("score_type") public String getScoreType() { return scoreType.toString(); } @@ -65,6 +67,7 @@ public Boolean getStrictMode() { } @Override + @JsonIgnore public ScorerConfig getScorerConfig() { ScorerConfig cfg = new ScorerConfig(); cfg.setScoreType(getScoreType()); diff --git a/src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/DerailmentScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/DerailmentScorer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/DerailmentScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/DerailmentScorer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java new file mode 100644 index 0000000..268962a --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java @@ -0,0 +1,57 @@ +package com.judgmentlabs.judgeval.scorers.api_scorers.custom_scorer; + +import com.judgmentlabs.judgeval.data.APIScorerType; +import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +import com.judgmentlabs.judgeval.scorers.APIScorer; + +/** + * Server-hosted custom scorer representation for enqueue payloads. + * Instances serialize into ExampleEvaluationRun.custom_scorers with score_type + * "Custom", server_hosted=true, and optional class_name for server routing. + */ +public class CustomScorer extends APIScorer { + public CustomScorer() { + super(APIScorerType.CUSTOM); + } + + /** + * Creates a server-hosted custom scorer with the given name. + * + * @param name + * scorer identifier and default class_name + * @return configured CustomScorer + */ + public static CustomScorer get(String name) { + CustomScorer s = new CustomScorer(); + s.setName(name); + s.setClassName(name); + s.setServerHosted(true); + return s; + } + + /** + * Creates a server-hosted custom scorer with explicit class_name for routing. + * + * @param name + * scorer identifier + * @param className + * server-side scorer class name + * @return configured CustomScorer + */ + public static CustomScorer get(String name, String className) { + CustomScorer s = get(name); + s.setClassName(className); + return s; + } + + @Override + /** + * Not used for server-hosted custom scorers. + * + * @deprecated never returns; always throws + * @return never returns; always throws + */ + public ScorerConfig getScorerConfig() { + throw new UnsupportedOperationException("CustomScorer does not use ScorerConfig"); + } +} diff --git a/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/BasePromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/BasePromptScorer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/BasePromptScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/BasePromptScorer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/tracer/BaseTracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/BaseTracer.java similarity index 89% rename from src/main/java/com/judgmentlabs/judgeval/tracer/BaseTracer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/BaseTracer.java index ea08614..75fa866 100644 --- a/src/main/java/com/judgmentlabs/judgeval/tracer/BaseTracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/BaseTracer.java @@ -9,12 +9,14 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import com.judgmentlabs.judgeval.Env; +import com.judgmentlabs.judgeval.data.EvaluationRunBuilder; import com.judgmentlabs.judgeval.data.Example; +import com.judgmentlabs.judgeval.data.TraceEvaluationRunBuilder; import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.internal.api.models.ExampleEvaluationRun; import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameRequest; import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameResponse; import com.judgmentlabs.judgeval.internal.api.models.TraceEvaluationRun; -import com.judgmentlabs.judgeval.internal.api.models.ExampleEvaluationRun; import com.judgmentlabs.judgeval.scorers.BaseScorer; import com.judgmentlabs.judgeval.tracer.exporters.JudgmentSpanExporter; import com.judgmentlabs.judgeval.tracer.exporters.NoOpSpanExporter; @@ -28,11 +30,6 @@ import io.opentelemetry.context.Scope; import io.opentelemetry.sdk.trace.export.SpanExporter; -import java.time.Instant; -import java.time.ZoneOffset; -import java.time.format.DateTimeFormatter; -import java.util.UUID; - public abstract class BaseTracer { public static final String TRACER_NAME = "judgeval"; @@ -453,8 +450,27 @@ public Tracer getTracer() { } /** - * Creates and returns a new span with the given name. The span must be ended - * manually by calling {@link Span#end()}. + * Gets the tracer configuration. + * + * @return the TracerConfiguration instance + */ + public TracerConfiguration getConfiguration() { + return configuration; + } + + /** + * Gets the resolved project ID, if available. + * + * @return an Optional containing the project ID, or empty if not resolved + */ + public Optional getProjectId() { + return projectId; + } + + /** + * Creates and returns a new span with the given name. The span must be entered + * with a try-with-resources block or manually ended by calling + * {@link Span#end()}. * * @param spanName * the name of the span @@ -505,59 +521,35 @@ private String getModelName(String model) { private ExampleEvaluationRun createEvaluationRun(BaseScorer scorer, Example example, String model, String traceId, String spanId) { String runId = generateRunId("async_evaluate_", spanId); - String modelName = getModelName(model); - - ExampleEvaluationRun exampleEvaluationRun = new ExampleEvaluationRun(); - exampleEvaluationRun.setProjectName(configuration.projectName()); - exampleEvaluationRun.setEvalName(runId); - exampleEvaluationRun.setJudgmentScorers(List.of(scorer.getScorerConfig())); - exampleEvaluationRun.setModel(modelName); - com.judgmentlabs.judgeval.internal.api.models.Example internalExample = (com.judgmentlabs.judgeval.internal.api.models.Example) example; - exampleEvaluationRun.setExamples(List.of(internalExample)); - exampleEvaluationRun.setTraceId(traceId); - exampleEvaluationRun.setTraceSpanId(spanId); - exampleEvaluationRun.setCustomScorers(new java.util.ArrayList<>()); - exampleEvaluationRun.setId(UUID.randomUUID() - .toString()); - exampleEvaluationRun.setCreatedAt(Instant.now() - .atOffset(ZoneOffset.UTC) - .format(DateTimeFormatter.ISO_INSTANT)); - return exampleEvaluationRun; + return new EvaluationRunBuilder() + .projectName(configuration.projectName()) + .evalName(runId) + .model(model) + .example(example) + .trace(traceId, spanId) + .addScorer(scorer) + .build(); } private TraceEvaluationRun createTraceEvaluationRun(BaseScorer scorer, String model, String traceId, String spanId) { String evalName = generateRunId("async_trace_evaluate_", spanId); - String modelName = getModelName(model); - - TraceEvaluationRun traceEvaluationRun = new TraceEvaluationRun(); - traceEvaluationRun.setProjectName(configuration.projectName()); - traceEvaluationRun.setEvalName(evalName); - traceEvaluationRun.setJudgmentScorers(List.of(scorer.getScorerConfig())); - traceEvaluationRun.setModel(modelName); - traceEvaluationRun.setTraceAndSpanIds(convertTraceAndSpanIds(List.of(List.of(traceId, spanId)))); - traceEvaluationRun.setIsOffline(false); - traceEvaluationRun.setIsBucketRun(false); - traceEvaluationRun.setCustomScorers(new java.util.ArrayList<>()); - traceEvaluationRun.setId(UUID.randomUUID() - .toString()); - traceEvaluationRun.setCreatedAt(Instant.now() - .atOffset(ZoneOffset.UTC) - .format(DateTimeFormatter.ISO_INSTANT)); - - return traceEvaluationRun; + return new TraceEvaluationRunBuilder() + .projectName(configuration.projectName()) + .evalName(evalName) + .model(model) + .trace(traceId, spanId) + .addScorer(scorer) + .build(); } private static List> convertTraceAndSpanIds(List> traceAndSpanIds) { - if (traceAndSpanIds == null || traceAndSpanIds.isEmpty()) { + if (traceAndSpanIds == null || traceAndSpanIds.isEmpty()) throw new IllegalArgumentException("Trace and span IDs are required for trace evaluations."); - } - List> converted = new java.util.ArrayList<>(); for (List pair : traceAndSpanIds) { - if (pair == null || pair.size() != 2) { + if (pair == null || pair.size() != 2) throw new IllegalArgumentException("Each trace and span ID pair must contain exactly 2 elements."); - } converted.add(List.of(pair.get(0), pair.get(1))); } return converted; diff --git a/src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/tracer/JudgevalTraceKeys.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/JudgevalTraceKeys.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/tracer/JudgevalTraceKeys.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/JudgevalTraceKeys.java diff --git a/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java diff --git a/src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java diff --git a/src/main/java/com/judgmentlabs/judgeval/tracer/exporters/JudgmentSpanExporter.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/exporters/JudgmentSpanExporter.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/tracer/exporters/JudgmentSpanExporter.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/exporters/JudgmentSpanExporter.java diff --git a/src/main/java/com/judgmentlabs/judgeval/tracer/exporters/NoOpSpanExporter.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/exporters/NoOpSpanExporter.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/tracer/exporters/NoOpSpanExporter.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/exporters/NoOpSpanExporter.java diff --git a/src/main/java/com/judgmentlabs/judgeval/utils/Logger.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/utils/Logger.java similarity index 100% rename from src/main/java/com/judgmentlabs/judgeval/utils/Logger.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/utils/Logger.java diff --git a/pom.xml b/pom.xml index 6de0db4..6855428 100644 --- a/pom.xml +++ b/pom.xml @@ -1,210 +1,43 @@ - + 4.0.0 com.judgmentlabs - judgeval-java - 0.2.2 - jar - Judgeval Java - Java SDK for Judgeval - https://github.com/judgmentlabs/judgeval-java - - - - Apache License, Version 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt - repo - - - - - - judgmentlabs - Judgment Labs - contact@judgmentlabs.com - Judgment Labs - https://judgmentlabs.ai - - - - - scm:git:git://github.com/judgmentlabs/judgeval-java.git - scm:git:ssh://github.com:judgmentlabs/judgeval-java.git - https://github.com/judgmentlabs/judgeval-java/tree/main - - + judgeval-parent + 0.0.0 + pom + Judgeval Parent + + judgeval-java + instrumentation/judgeval-instrumentation-openai + examples + 21 21 UTF-8 - 2.17.0 - 2.43.0 - 3.3.1 + 0.2.3 + 0.0.1 - - - - com.fasterxml.jackson.core - jackson-databind - ${jackson.version} - - - com.fasterxml.jackson.core - jackson-core - ${jackson.version} - - - org.junit.jupiter - junit-jupiter - 5.10.1 - test - - - org.mockito - mockito-core - 5.8.0 - test - - - org.mockito - mockito-junit-jupiter - 5.8.0 - test - - - io.opentelemetry - opentelemetry-sdk-testing - 1.39.0 - test - - - com.github.tomakehurst - wiremock-jre8 - 2.35.0 - test - - - org.awaitility - awaitility - 4.2.0 - test - - - io.opentelemetry - opentelemetry-exporter-otlp - 1.39.0 - - - io.opentelemetry - opentelemetry-sdk-trace - 1.39.0 - - - io.opentelemetry - opentelemetry-sdk - 1.39.0 - - - com.google.code.gson - gson - 2.10.1 - - - + + + + com.judgmentlabs + judgeval-java + ${judgeval-java.version} + + + com.judgmentlabs + judgeval-instrumentation-openai + ${judgeval-instrumentation-openai.version} + + + - - org.sonatype.central - central-publishing-maven-plugin - 0.8.0 - true - - central - true - published - 3600 - 5 - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.12.1 - - 21 - 21 - - - - - org.apache.maven.plugins - maven-jar-plugin - 3.4.1 - - - - ${project.version} - - - - - - - org.apache.maven.plugins - maven-source-plugin - 3.3.0 - - - attach-sources - - jar-no-fork - - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - 3.6.3 - - - attach-javadocs - - jar - - - - - - - org.apache.maven.plugins - maven-gpg-plugin - 3.1.0 - - - sign-artifacts - verify - - sign - - - - - --pinentry-mode - loopback - - - - - com.diffplug.spotless spotless-maven-plugin - ${spotless.version} + 2.43.0 @@ -218,25 +51,6 @@ - - - org.apache.maven.plugins - maven-checkstyle-plugin - ${checkstyle.version} - - checkstyle.xml - true - false - - - - - - central - Maven Central Repository - https://central.sonatype.com/api/v1/publisher/deployments - - - + \ No newline at end of file diff --git a/scripts/generate_client.py b/scripts/generate_client.py index 54d9bc1..2a3cb68 100755 --- a/scripts/generate_client.py +++ b/scripts/generate_client.py @@ -173,7 +173,10 @@ def get_java_type(schema: Dict[str, Any]) -> str: if len(non_null_types) == 1: return list(non_null_types)[0] else: - print(f"Union type with multiple non-null types: {non_null_types}", file=sys.stderr) + print( + f"Union type with multiple non-null types: {non_null_types}", + file=sys.stderr, + ) return "Object" schema_type = schema.get("type", "object") @@ -195,7 +198,7 @@ def get_java_type(schema: Dict[str, Any]) -> str: def generate_model_class(className: str, schema: Dict[str, Any]) -> str: required_fields = set(schema.get("required", [])) has_required = bool(required_fields) - + lines = [ "package com.judgmentlabs.judgeval.internal.api.models;", "", @@ -207,7 +210,7 @@ def generate_model_class(className: str, schema: Dict[str, Any]) -> str: "import java.util.Map;", "import java.util.Objects;", ] - + lines.extend(["", f"public class {className} {{"]) fields = [] @@ -224,9 +227,9 @@ def generate_model_class(className: str, schema: Dict[str, Any]) -> str: field_lines = [ f' @JsonProperty("{field_name}")', - f" private {java_type} {camel_case_name};" + f" private {java_type} {camel_case_name};", ] - + fields.extend(field_lines) getters.extend( @@ -451,9 +454,9 @@ def generate_client_class( " private final String organizationId;", "", f" public {className}(String baseUrl, String apiKey, String organizationId) {{", - " this.baseUrl = Objects.requireNonNull(baseUrl, \"Base URL cannot be null\");", - " this.apiKey = Objects.requireNonNull(apiKey, \"API key cannot be null\");", - " this.organizationId = Objects.requireNonNull(organizationId, \"Organization ID cannot be null\");", + ' this.baseUrl = Objects.requireNonNull(baseUrl, "Base URL cannot be null");', + ' this.apiKey = Objects.requireNonNull(apiKey, "API key cannot be null");', + ' this.organizationId = Objects.requireNonNull(organizationId, "Organization ID cannot be null");', " this.client = HttpClient.newBuilder()", " .version(HttpClient.Version.HTTP_1_1)", " .build();", @@ -490,14 +493,22 @@ def generate_client_class( ] throws_clause = "" if is_async else " throws IOException" - lines.append(f" private T handleResponse(HttpResponse response){throws_clause} {{") + lines.append( + f" private T handleResponse(HttpResponse response){throws_clause} {{" + ) lines.append(" if (response.statusCode() >= 400) {") - lines.append(f' throw new RuntimeException("HTTP Error: " + response.statusCode() + " - " + response.body());') + lines.append( + f' throw new RuntimeException("HTTP Error: " + response.statusCode() + " - " + response.body());' + ) lines.append(" }") lines.append(" try {") - lines.append(" return mapper.readValue(response.body(), new TypeReference() {});") + lines.append( + " return mapper.readValue(response.body(), new TypeReference() {});" + ) lines.append(" } catch (Exception e) {") - lines.append(' throw new RuntimeException("Failed to parse response", e);') + lines.append( + ' throw new RuntimeException("Failed to parse response", e);' + ) lines.append(" }") lines.append(" }") lines.append("") @@ -534,7 +545,9 @@ def generate_api_files(spec: Dict[str, Any]) -> None: used_schemas = find_used_schemas(spec) schemas = spec.get("components", {}).get("schemas", {}) - models_dir = "src/main/java/com/judgmentlabs/judgeval/internal/api/models" + models_dir = ( + "judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/models" + ) if os.path.exists(models_dir): print(f"Clearing existing models directory: {models_dir}", file=sys.stderr) shutil.rmtree(models_dir) @@ -590,7 +603,7 @@ def generate_api_files(spec: Dict[str, Any]) -> None: } methods.append(method_info) - api_dir = "src/main/java/com/judgmentlabs/judgeval/internal/api" + api_dir = "judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api" os.makedirs(api_dir, exist_ok=True) for is_async, class_name in [