open-telemetry · jack-berg · Nov 19, 2025 · Sep 17, 2025 · Sep 23, 2025 · Sep 23, 2025
@@ -8,6 +8,46 @@ plugins {
 description = "OpenTelemetry Exporter Common"
 otelJava.moduleName.set("io.opentelemetry.exporter.internal")
 
+java {
+  sourceSets {
+    create("java9") {
+      java {
+        srcDir("src/main/java9")
+      }
+      // Make java9 source set depend on main source set
+      // since VarHandleStringEncoder implements StringEncoder from the main source set
+      compileClasspath += sourceSets.main.get().output + sourceSets.main.get().compileClasspath
+    }
+  }
+}
+
+// Configure java9 compilation to see main source classes
+sourceSets.named("java9") {
+  compileClasspath += sourceSets.main.get().output
+}
+
+tasks.named<JavaCompile>("compileJava9Java") {
+  options.release.set(9)
+}
+
+tasks.named<Jar>("jar") {
+  manifest {
+    attributes["Multi-Release"] = "true"
+  }
+  from(sourceSets.named("java9").get().output) {
+    into("META-INF/versions/9")
+  }
+}
+
+// Configure test to include java9 classes when running on Java 9+
+// so that StringEncoderHolder.createUnsafeEncoder() can instantiate the Java 9 version
+val javaVersion = JavaVersion.current()
+if (javaVersion >= JavaVersion.VERSION_1_9) {
+  sourceSets.named("test") {
+    runtimeClasspath += sourceSets.named("java9").get().output
+  }
+}
+
 val versions: Map<String, String> by project
 dependencies {
   api(project(":api:all"))
@@ -79,6 +119,15 @@ tasks {
   check {
     dependsOn(testing.suites)
   }
+
+  withType<Test> {
+    // Allow VarHandle access to String internals
+    // generally users won't do this and so won't get the VarHandle implementation
+    // but the Java agent is able to automatically open these modules
+    // (see ModuleOpener.java in that repository)
+    jvmArgs("--add-opens=java.base/java.lang=ALL-UNNAMED")
+    jvmArgs("-XX:+IgnoreUnrecognizedVMOptions") // needed for Java 8
+  }
 }
 
 afterEvaluate {

@@ -0,0 +1,93 @@
+/*
+ * Copyright The OpenTelemetry Authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package io.opentelemetry.exporter.internal.marshal;
+
+import java.io.IOException;
+
+/**
+ * This class contains shared logic for UTF-8 encoding operations while allowing subclasses to
+ * implement different mechanisms for accessing String internal byte arrays (e.g., Unsafe vs
+ * VarHandle).
+ *
+ * <p>This class is internal and is hence not for public use. Its APIs are unstable and can change
+ * at any time.
+ */
+abstract class AbstractStringEncoder implements StringEncoder {
+
+  private final FallbackStringEncoder fallback = new FallbackStringEncoder();
+
+  @Override
+  public final void writeUtf8(CodedOutputStream output, String string, int utf8Length)
+      throws IOException {
+    // if the length of the latin1 string and the utf8 output are the same then the string must be
+    // composed of only 7bit characters and can be directly copied to the output
+    if (string.length() == utf8Length && isLatin1(string)) {
+      byte[] bytes = getStringBytes(string);
+      output.write(bytes, 0, bytes.length);
+    } else {
+      fallback.writeUtf8(output, string, utf8Length);
+    }
+  }
+
+  @Override
+  public final int getUtf8Size(String string) {
+    if (isLatin1(string)) {
+      byte[] bytes = getStringBytes(string);
+      // latin1 bytes with negative value (most significant bit set) are encoded as 2 bytes in utf8
+      return string.length() + countNegative(bytes);
+    }
+
+    return fallback.getUtf8Size(string);
+  }
+
+  protected abstract byte[] getStringBytes(String string);
+
+  protected abstract boolean isLatin1(String string);
+
+  protected abstract long getLong(byte[] bytes, int offset);
+
+  // Inner loop can process at most 8 * 255 bytes without overflowing counter. To process more bytes
+  // inner loop has to be run multiple times.
+  private static final int MAX_INNER_LOOP_SIZE = 8 * 255;
+  // mask that selects only the most significant bit in every byte of the long
+  private static final long MOST_SIGNIFICANT_BIT_MASK = 0x8080808080808080L;
+
+  /** Returns the count of bytes with negative value. */
+  private int countNegative(byte[] bytes) {
+    int count = 0;
+    int offset = 0;
+    // We are processing one long (8 bytes) at a time. In the inner loop we are keeping counts in a
+    // long where each byte in the long is a separate counter. Due to this the inner loop can
+    // process a maximum of 8*255 bytes at a time without overflow.
+    for (int i = 1; i <= bytes.length / MAX_INNER_LOOP_SIZE + 1; i++) {
+      long tmp = 0; // each byte in this long is a separate counter
+      int limit = Math.min(i * MAX_INNER_LOOP_SIZE, bytes.length & ~7);
+      for (; offset < limit; offset += 8) {
+        long value = getLong(bytes, offset);
+        // Mask the value keeping only the most significant bit in each byte and then shift this bit
+        // to the position of the least significant bit in each byte. If the input byte was not
+        // negative then after this transformation it will be zero, if it was negative then it will
+        // be one.
+        tmp += (value & MOST_SIGNIFICANT_BIT_MASK) >>> 7;
+      }
+      // sum up counts
+      if (tmp != 0) {
+        for (int j = 0; j < 8; j++) {
+          count += (int) (tmp & 0xff);
+          tmp = tmp >>> 8;
+        }
+      }
+    }
+
+    // Handle remaining bytes. Previous loop processes 8 bytes a time, if the input size is not
+    // divisible with 8 the remaining bytes are handled here.
+    for (int i = offset; i < bytes.length; i++) {
+      // same as if (bytes[i] < 0) count++;
+      count += bytes[i] >>> 31;
+    }
+    return count;
+  }
+}
@@ -0,0 +1,140 @@
+/*
+ * Copyright The OpenTelemetry Authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package io.opentelemetry.exporter.internal.marshal;
+
+import java.io.IOException;
+
+/**
+ * Fallback StringEncoder implementation using standard Java string operations.
+ *
+ * <p>This implementation works on all Java versions and provides correct UTF-8 handling.
+ *
+ * <p>This class is internal and is hence not for public use. Its APIs are unstable and can change
+ * at any time.
+ */
+final class FallbackStringEncoder implements StringEncoder {
+
+  FallbackStringEncoder() {}
+
+  @Override
+  public int getUtf8Size(String string) {
+    return encodedUtf8Length(string);
+  }
+
+  @Override
+  public void writeUtf8(CodedOutputStream output, String string, int utf8Length)
+      throws IOException {
+    encodeUtf8(output, string);
+  }
+
+  // adapted from
+  // https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L217
+  private static int encodedUtf8Length(String string) {
+    // Warning to maintainers: this implementation is highly optimized.
+    int utf16Length = string.length();
+    int utf8Length = utf16Length;
+    int i = 0;
+
+    // This loop optimizes for pure ASCII.
+    while (i < utf16Length && string.charAt(i) < 0x80) {
+      i++;
+    }
+
+    // This loop optimizes for chars less than 0x800.
+    for (; i < utf16Length; i++) {
+      char c = string.charAt(i);
+      if (c < 0x800) {
+        utf8Length += ((0x7f - c) >>> 31); // branch free!
+      } else {
+        utf8Length += encodedUtf8LengthGeneral(string, i);
+        break;
+      }
+    }
+
+    if (utf8Length < utf16Length) {
+      // Necessary and sufficient condition for overflow because of maximum 3x expansion
+      throw new IllegalArgumentException(
+          "UTF-8 length does not fit in int: " + (utf8Length + (1L << 32)));
+    }
+
+    return utf8Length;
+  }
+
+  // adapted from
+  // https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L247
+  private static int encodedUtf8LengthGeneral(String string, int start) {
+    int utf16Length = string.length();
+    int utf8Length = 0;
+    for (int i = start; i < utf16Length; i++) {
+      char c = string.charAt(i);
+      if (c < 0x800) {
+        utf8Length += (0x7f - c) >>> 31; // branch free!
+      } else {
+        utf8Length += 2;
+        if (Character.isSurrogate(c)) {
+          // Check that we have a well-formed surrogate pair.
+          if (Character.codePointAt(string, i) != c) {
+            i++;
+          } else {
+            // invalid sequence
+            // At this point we have accumulated 3 byes of length (2 in this method and 1 in caller)
+            // for current character, reduce the length to 1 bytes as we are going to encode the
+            // invalid character as ?
+            utf8Length -= 2;
+          }
+        }
+      }
+    }
+
+    return utf8Length;
+  }
+
+  // encode utf8 the same way as length is computed in encodedUtf8Length
+  // adapted from
+  // https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L1016
+  private static void encodeUtf8(CodedOutputStream output, String in) throws IOException {
+    int utf16Length = in.length();
+    int i = 0;
+    // Designed to take advantage of
+    // https://wiki.openjdk.java.net/display/HotSpotInternals/RangeCheckElimination
+    for (char c; i < utf16Length && (c = in.charAt(i)) < 0x80; i++) {
+      output.write((byte) c);
+    }
+    if (i == utf16Length) {
+      return;
+    }
+
+    for (char c; i < utf16Length; i++) {
+      c = in.charAt(i);
+      if (c < 0x80) {
+        // 1 byte, 7 bits
+        output.write((byte) c);
+      } else if (c < 0x800) { // 11 bits, two UTF-8 bytes
+        output.write((byte) ((0xF << 6) | (c >>> 6)));
+        output.write((byte) (0x80 | (0x3F & c)));
+      } else if (!Character.isSurrogate(c)) {
+        // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
+        output.write((byte) ((0xF << 5) | (c >>> 12)));
+        output.write((byte) (0x80 | (0x3F & (c >>> 6))));
+        output.write((byte) (0x80 | (0x3F & c)));
+      } else {
+        // Minimum code point represented by a surrogate pair is 0x10000, 17 bits,
+        // four UTF-8 bytes
+        int codePoint = Character.codePointAt(in, i);
+        if (codePoint != c) {
+          output.write((byte) ((0xF << 4) | (codePoint >>> 18)));
+          output.write((byte) (0x80 | (0x3F & (codePoint >>> 12))));
+          output.write((byte) (0x80 | (0x3F & (codePoint >>> 6))));
+          output.write((byte) (0x80 | (0x3F & codePoint)));
+          i++;
+        } else {
+          // invalid sequence
+          output.write((byte) '?');
+        }
+      }
+    }
+  }
+}
@@ -27,7 +27,7 @@
  */
 public final class MarshalerContext {
   private final boolean marshalStringNoAllocation;
-  private final boolean marshalStringUnsafe;
+  private final StringEncoder stringEncoder;
 
   private int[] sizes = new int[16];
   private int sizeReadIndex;
@@ -37,20 +37,25 @@ public final class MarshalerContext {
   private int dataWriteIndex;
 
   public MarshalerContext() {
-    this(/* marshalStringNoAllocation= */ true, /* marshalStringUnsafe= */ true);
+    this(/* marshalStringNoAllocation= */ true);
   }
 
-  public MarshalerContext(boolean marshalStringNoAllocation, boolean marshalStringUnsafe) {
+  public MarshalerContext(boolean marshalStringNoAllocation) {
     this.marshalStringNoAllocation = marshalStringNoAllocation;
-    this.marshalStringUnsafe = marshalStringUnsafe;
+    this.stringEncoder = StringEncoder.getInstance();
+  }
+
+  public MarshalerContext(boolean marshalStringNoAllocation, StringEncoder stringEncoder) {
+    this.marshalStringNoAllocation = marshalStringNoAllocation;
+    this.stringEncoder = stringEncoder;
   }
 
   public boolean marshalStringNoAllocation() {
     return marshalStringNoAllocation;
   }
 
-  public boolean marshalStringUnsafe() {
-    return marshalStringUnsafe;
+  public StringEncoder getStringEncoder() {
+    return stringEncoder;
   }
 
   public void addSize(int size) {

@@ -160,7 +160,7 @@ public void writeString(
     output.writeUInt32NoTag(field.getTag());
     output.writeUInt32NoTag(utf8Length);
 
-    StatelessMarshalerUtil.writeUtf8(output, string, utf8Length, context);
+    context.getStringEncoder().writeUtf8(output, string, utf8Length);
   }
 
   @Override