Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions exporters/common/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,46 @@ plugins {
description = "OpenTelemetry Exporter Common"
otelJava.moduleName.set("io.opentelemetry.exporter.internal")

java {
sourceSets {
create("java9") {
java {
srcDir("src/main/java9")
}
// Make java9 source set depend on main source set
// since VarHandleStringEncoder implements StringEncoder from the main source set
compileClasspath += sourceSets.main.get().output + sourceSets.main.get().compileClasspath
}
}
}

// Configure java9 compilation to see main source classes
sourceSets.named("java9") {
compileClasspath += sourceSets.main.get().output
}

tasks.named<JavaCompile>("compileJava9Java") {
options.release.set(9)
}

tasks.named<Jar>("jar") {
manifest {
attributes["Multi-Release"] = "true"
}
from(sourceSets.named("java9").get().output) {
into("META-INF/versions/9")
}
}

// Configure test to include java9 classes when running on Java 9+
// so that StringEncoderHolder.createUnsafeEncoder() can instantiate the Java 9 version
val javaVersion = JavaVersion.current()
if (javaVersion >= JavaVersion.VERSION_1_9) {
sourceSets.named("test") {
runtimeClasspath += sourceSets.named("java9").get().output
}
}

val versions: Map<String, String> by project
dependencies {
api(project(":api:all"))
Expand Down Expand Up @@ -79,6 +119,15 @@ tasks {
check {
dependsOn(testing.suites)
}

withType<Test> {
// Allow VarHandle access to String internals
// generally users won't do this and so won't get the VarHandle implementation
// but the Java agent is able to automatically open these modules
// (see ModuleOpener.java in that repository)
jvmArgs("--add-opens=java.base/java.lang=ALL-UNNAMED")
jvmArgs("-XX:+IgnoreUnrecognizedVMOptions") // needed for Java 8
}
}

afterEvaluate {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/

package io.opentelemetry.exporter.internal.marshal;

import java.io.IOException;

/**
* This class contains shared logic for UTF-8 encoding operations while allowing subclasses to
* implement different mechanisms for accessing String internal byte arrays (e.g., Unsafe vs
* VarHandle).
*
* <p>This class is internal and is hence not for public use. Its APIs are unstable and can change
* at any time.
*/
abstract class AbstractStringEncoder implements StringEncoder {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the code in this class is just moved (cut-and-paste) from StatelessMarshalerUtil


private final FallbackStringEncoder fallback = new FallbackStringEncoder();

@Override
public final void writeUtf8(CodedOutputStream output, String string, int utf8Length)
throws IOException {
// if the length of the latin1 string and the utf8 output are the same then the string must be
// composed of only 7bit characters and can be directly copied to the output
if (string.length() == utf8Length && isLatin1(string)) {
byte[] bytes = getStringBytes(string);
output.write(bytes, 0, bytes.length);
} else {
fallback.writeUtf8(output, string, utf8Length);
}
}

@Override
public final int getUtf8Size(String string) {
if (isLatin1(string)) {
byte[] bytes = getStringBytes(string);
// latin1 bytes with negative value (most significant bit set) are encoded as 2 bytes in utf8
return string.length() + countNegative(bytes);
}

return fallback.getUtf8Size(string);
}

protected abstract byte[] getStringBytes(String string);

protected abstract boolean isLatin1(String string);

protected abstract long getLong(byte[] bytes, int offset);

// Inner loop can process at most 8 * 255 bytes without overflowing counter. To process more bytes
// inner loop has to be run multiple times.
private static final int MAX_INNER_LOOP_SIZE = 8 * 255;
// mask that selects only the most significant bit in every byte of the long
private static final long MOST_SIGNIFICANT_BIT_MASK = 0x8080808080808080L;

/** Returns the count of bytes with negative value. */
private int countNegative(byte[] bytes) {
int count = 0;
int offset = 0;
// We are processing one long (8 bytes) at a time. In the inner loop we are keeping counts in a
// long where each byte in the long is a separate counter. Due to this the inner loop can
// process a maximum of 8*255 bytes at a time without overflow.
for (int i = 1; i <= bytes.length / MAX_INNER_LOOP_SIZE + 1; i++) {
long tmp = 0; // each byte in this long is a separate counter
int limit = Math.min(i * MAX_INNER_LOOP_SIZE, bytes.length & ~7);
for (; offset < limit; offset += 8) {
long value = getLong(bytes, offset);
// Mask the value keeping only the most significant bit in each byte and then shift this bit
// to the position of the least significant bit in each byte. If the input byte was not
// negative then after this transformation it will be zero, if it was negative then it will
// be one.
tmp += (value & MOST_SIGNIFICANT_BIT_MASK) >>> 7;
}
// sum up counts
if (tmp != 0) {
for (int j = 0; j < 8; j++) {
count += (int) (tmp & 0xff);
tmp = tmp >>> 8;
}
}
}

// Handle remaining bytes. Previous loop processes 8 bytes a time, if the input size is not
// divisible with 8 the remaining bytes are handled here.
for (int i = offset; i < bytes.length; i++) {
// same as if (bytes[i] < 0) count++;
count += bytes[i] >>> 31;
}
return count;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/

package io.opentelemetry.exporter.internal.marshal;

import java.io.IOException;

/**
* Fallback StringEncoder implementation using standard Java string operations.
*
* <p>This implementation works on all Java versions and provides correct UTF-8 handling.
*
* <p>This class is internal and is hence not for public use. Its APIs are unstable and can change
* at any time.
*/
final class FallbackStringEncoder implements StringEncoder {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the code in this class is just moved (cut-and-paste) from StatelessMarshalerUtil


FallbackStringEncoder() {}

@Override
public int getUtf8Size(String string) {
return encodedUtf8Length(string);
}

@Override
public void writeUtf8(CodedOutputStream output, String string, int utf8Length)
throws IOException {
encodeUtf8(output, string);
}

// adapted from
// https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L217
private static int encodedUtf8Length(String string) {
// Warning to maintainers: this implementation is highly optimized.
int utf16Length = string.length();
int utf8Length = utf16Length;
int i = 0;

// This loop optimizes for pure ASCII.
while (i < utf16Length && string.charAt(i) < 0x80) {
i++;
}

// This loop optimizes for chars less than 0x800.
for (; i < utf16Length; i++) {
char c = string.charAt(i);
if (c < 0x800) {
utf8Length += ((0x7f - c) >>> 31); // branch free!
} else {
utf8Length += encodedUtf8LengthGeneral(string, i);
break;
}
}

if (utf8Length < utf16Length) {
// Necessary and sufficient condition for overflow because of maximum 3x expansion
throw new IllegalArgumentException(
"UTF-8 length does not fit in int: " + (utf8Length + (1L << 32)));
}

return utf8Length;
}

// adapted from
// https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L247
private static int encodedUtf8LengthGeneral(String string, int start) {
int utf16Length = string.length();
int utf8Length = 0;
for (int i = start; i < utf16Length; i++) {
char c = string.charAt(i);
if (c < 0x800) {
utf8Length += (0x7f - c) >>> 31; // branch free!
} else {
utf8Length += 2;
if (Character.isSurrogate(c)) {
// Check that we have a well-formed surrogate pair.
if (Character.codePointAt(string, i) != c) {
i++;
} else {
// invalid sequence
// At this point we have accumulated 3 byes of length (2 in this method and 1 in caller)
// for current character, reduce the length to 1 bytes as we are going to encode the
// invalid character as ?
utf8Length -= 2;
}
}
}
}

return utf8Length;
}

// encode utf8 the same way as length is computed in encodedUtf8Length
// adapted from
// https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L1016
private static void encodeUtf8(CodedOutputStream output, String in) throws IOException {
int utf16Length = in.length();
int i = 0;
// Designed to take advantage of
// https://wiki.openjdk.java.net/display/HotSpotInternals/RangeCheckElimination
for (char c; i < utf16Length && (c = in.charAt(i)) < 0x80; i++) {
output.write((byte) c);
}
if (i == utf16Length) {
return;
}

for (char c; i < utf16Length; i++) {
c = in.charAt(i);
if (c < 0x80) {
// 1 byte, 7 bits
output.write((byte) c);
} else if (c < 0x800) { // 11 bits, two UTF-8 bytes
output.write((byte) ((0xF << 6) | (c >>> 6)));
output.write((byte) (0x80 | (0x3F & c)));
} else if (!Character.isSurrogate(c)) {
// Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
output.write((byte) ((0xF << 5) | (c >>> 12)));
output.write((byte) (0x80 | (0x3F & (c >>> 6))));
output.write((byte) (0x80 | (0x3F & c)));
} else {
// Minimum code point represented by a surrogate pair is 0x10000, 17 bits,
// four UTF-8 bytes
int codePoint = Character.codePointAt(in, i);
if (codePoint != c) {
output.write((byte) ((0xF << 4) | (codePoint >>> 18)));
output.write((byte) (0x80 | (0x3F & (codePoint >>> 12))));
output.write((byte) (0x80 | (0x3F & (codePoint >>> 6))));
output.write((byte) (0x80 | (0x3F & codePoint)));
i++;
} else {
// invalid sequence
output.write((byte) '?');
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
*/
public final class MarshalerContext {
private final boolean marshalStringNoAllocation;
private final boolean marshalStringUnsafe;
private final StringEncoder stringEncoder;

private int[] sizes = new int[16];
private int sizeReadIndex;
Expand All @@ -37,20 +37,25 @@ public final class MarshalerContext {
private int dataWriteIndex;

public MarshalerContext() {
this(/* marshalStringNoAllocation= */ true, /* marshalStringUnsafe= */ true);
this(/* marshalStringNoAllocation= */ true);
}

public MarshalerContext(boolean marshalStringNoAllocation, boolean marshalStringUnsafe) {
public MarshalerContext(boolean marshalStringNoAllocation) {
this.marshalStringNoAllocation = marshalStringNoAllocation;
this.marshalStringUnsafe = marshalStringUnsafe;
this.stringEncoder = StringEncoder.getInstance();
}

public MarshalerContext(boolean marshalStringNoAllocation, StringEncoder stringEncoder) {
this.marshalStringNoAllocation = marshalStringNoAllocation;
this.stringEncoder = stringEncoder;
}

public boolean marshalStringNoAllocation() {
return marshalStringNoAllocation;
}

public boolean marshalStringUnsafe() {
return marshalStringUnsafe;
public StringEncoder getStringEncoder() {
return stringEncoder;
}

public void addSize(int size) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public void writeString(
output.writeUInt32NoTag(field.getTag());
output.writeUInt32NoTag(utf8Length);

StatelessMarshalerUtil.writeUtf8(output, string, utf8Length, context);
context.getStringEncoder().writeUtf8(output, string, utf8Length);
}

@Override
Expand Down
Loading
Loading