diff --git a/java-scala-spark4/.gitignore b/java-scala-spark4/.gitignore new file mode 100644 index 0000000000..d09c9c692e --- /dev/null +++ b/java-scala-spark4/.gitignore @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Eclipse files +.metadata/ +.classpath +.project +.settings/ + +# Maven build artifacts +target/ +dependency-reduced-pom.xml + +# Gradle build artifacts +.gradle +build +gradle/wrapper/*.jar + +# IntelliJ +*.ipr +*.iws +*.iml +.idea/ +classes +**/out diff --git a/java-scala-spark4/.scalafmt.conf b/java-scala-spark4/.scalafmt.conf new file mode 100644 index 0000000000..dc8ad3976b --- /dev/null +++ b/java-scala-spark4/.scalafmt.conf @@ -0,0 +1,23 @@ +style = defaultWithAlign + +align = false +binPack { + literalArgumentLists = true + parentConstructors = true +} +docstrings = JavaDoc +lineEndings = unix +maxColumn = 100 +newlines { + alwaysBeforeTopLevelStatements = true + penalizeSingleSelectMultiArgList = false +} +rewrite.rules = [ + avoidinfix, + expandimportselectors, + prefercurlyfors, +] +spaces { + inImportCurlyBraces = false +} +unindentTopLevelOperators = true \ No newline at end of file diff --git a/java-scala-spark4/README.adoc b/java-scala-spark4/README.adoc new file mode 100644 index 0000000000..e37846fb7c --- /dev/null +++ b/java-scala-spark4/README.adoc @@ -0,0 +1,169 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + += Kudu Java Developer Documentation + +== System Requirements + +- Runtime + * Java 8+ +- Build time + * Java 8+ +- Test time + * Built Kudu Binaries + * MIT Kerberos (krb5) + +== Building Everything + +[source,bash] +---- +$ ./gradlew assemble +---- + +== Building Just the Client + +[source,bash] +---- +$ ./gradlew :kudu-client:assemble +---- + +The client jar can then be found at `kudu-client/build/libs`. + +== Running the Tests + +The tests may locate the master and tablet server +binaries by looking in `build/latest/bin` from the root of +the git repository. If you have recently built the C++ code +for Kudu, those should be present already. + +If for some reason the binaries aren't in the expected location +as shown above, you can pass +`-DkuduBinDir=/path/to/directory`. + +Once everything is setup correctly, run: + +[source,bash] +---- +$ ./gradlew test +---- + +If you would like to force tests that were already run to re-run +you can pass `-PrerunTests`. Another option is to run + +[source,bash] +---- +$ ./gradlew cleanTest test +---- + +The difference is that the latter spends extra time re-building test state, +while the former runs them again. Using -PrerunTests can be useful to quickly +loop on tests with less slowdown. See +https://docs.gradle.org/5.6.4/userguide/java_testing.html#sec:forcing_java_tests_to_run[here] +for more details. + +If you would like to run a subset of the tests or a single test +you can pass the Gradle `--tests ` argument to filter +the tests ran. +See https://docs.gradle.org/current/userguide/java_testing.html#test_filtering[here] +for detailed documentation of all pattern options. + +Single Test Class Example: + +[source,bash] +---- +$ ./gradlew :kudu-client:test --tests org.apache.kudu.TestColumnSchema +---- + +Single Test Method Example: + +[source,bash] +---- +$ ./gradlew :kudu-client:test --tests org.apache.kudu.TestColumnSchema.testEquals +---- + +Pattern Example: + +[source,bash] +---- +$ ./gradlew test --tests *IT* +---- + +== Additional Gradle Commands + +=== Discovering Other Gradle Tasks + +[source,bash] +---- +$ ./gradlew tasks +---- + +=== Clearing Build State + +[source,bash] +---- +$ ./gradlew clean +---- + +Note: You generally don't need to run this task, but it can be useful +to rule out any build issues. + +=== Installing Artifacts to the Local Maven Repository + +[source,bash] +---- +$ ./gradlew install +---- + +=== Checking for Dependency Updates + +[source,bash] +---- +./gradlew dependencyUpdates +---- + +NOTE: Additional Gradle command line flag options can be found +https://docs.gradle.org/current/userguide/command_line_interface.html[here]. + +== Tips for IDEs + +=== IntelliJ + +Follow the standard instructions as laid out +https://www.jetbrains.com/help/idea/gradle.html#gradle_import[here] +to import the Gradle project. + +For the most consistent behavior on the command line and +in the IDE, be sure to enable `Delegate IDE build/run actions to gradle` +and run tests using the `Gradle Test Runner` as described +https://www.jetbrains.com/help/idea/gradle.html#delegate_build_gradle[here]. + +Some of the classes generated by Kudu Protobuf files are larger than the +default "intellisense" limit in Intellij. This means Intellij won't +provide code assistance for the classes in those files and may indicate +that the classes in those files are not found. Follow the instructions +https://intellij-support.jetbrains.com/hc/en-us/articles/206544869-Configuring-JVM-options-and-platform-properties[here] +to set "idea.max.intellisense.filesize=5000" in the `idea.properties` file +to work around this issue. + +=== Eclipse + +Follow the instructions as laid out +http://www.vogella.com/tutorials/EclipseGradle/article.html#eclipse-gradle-support[here] +to install the Eclipse Gradle (Buildship) tooling. +Then follow the instruction on the same page +http://www.vogella.com/tutorials/EclipseGradle/article.html#import-an-existing-gradle-project[here] +to import an existing project. diff --git a/java-scala-spark4/build.gradle b/java-scala-spark4/build.gradle new file mode 100644 index 0000000000..4fa917e7d2 --- /dev/null +++ b/java-scala-spark4/build.gradle @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.kudu.gradle.DistTestTask + +// This file is the entry-point for the gradle build and contains +// common logic for the various subprojects in the build. +// Plugins and scripts are applied in the natural "build order" +// they are used to ensure there are no dependency issues. + +// Plugins and scripts applied at the root level only, instead of per module. +apply plugin: "idea" +apply plugin: "eclipse" +apply plugin: "org.barfuin.gradle.jacocolog" +apply from: "$rootDir/gradle/properties.gradle" +apply from: "$rootDir/gradle/dependencies.gradle" +apply from: "$rootDir/gradle/wrapper.gradle" + +allprojects { + // These are common to all projects, including this + // top level parent project. + repositories { + mavenCentral() + mavenLocal() + } + + // Read the version.txt file to set the project version + project.version = file("$rootDir/../version.txt").text.trim() + + apply from: "$rootDir/gradle/docs.gradle" +} + +subprojects { + // These are common to all subprojects. However, subprojects may + // include their own plugins and scripts as well. + apply plugin: "java" + apply from: "$rootDir/gradle/scopes.gradle" + apply from: "$rootDir/gradle/compile.gradle" + apply from: "$rootDir/gradle/tests.gradle" + apply from: "$rootDir/gradle/quality.gradle" + apply from: "$rootDir/gradle/artifacts.gradle" + apply from: "$rootDir/gradle/publishing.gradle" + + // Ignore the transitive annotations libraries that are + // not marked as optional in Guava version 22.0+. + // See https://github.com/google/guava/issues/2824 + configurations.compile { + exclude group: "com.google.errorprone", module: "error_prone_annotations" + exclude group: "com.google.code.findbugs", module: "jsr305" + exclude group: "com.google.j2objc", module: "j2objc-annotations" + exclude group: "org.checkerframework", module: "checker-compat-qual" + exclude group: "org.codehaus.mojo", module: "animal-sniffer-annotations" + } + + sourceSets { + all { + configurations.all { conf -> + attributes.attribute(Attribute.of("org.gradle.jvm.environment", String), "standard-jvm") + } + } + } +} + +// A task that does nothing, but is useful to ensure the Gradle build and tasks are initialized. +task initializeTasks() { + doLast { + println("Initialized Gradle tasks") + } +} + +task javadocAggregate(type: Javadoc, group: "Documentation") { + description = "Generates Aggregate Javadoc API documentation for the main source code." + source subprojects.collect { it.sourceSets.main.allJava } + classpath = files(subprojects.collect { it.sourceSets.main.compileClasspath }) + destinationDir = file("${buildDir}/docs/javadoc") +} + +// Copies all the dependency jars locally so that we can reference +// them inside the project structure while running the distributed +// tests instead of in the gradle cache which is in the users home +// directory by default. +task copyDistTestJars(type: Copy) { + into "$buildDir/jars/" + from subprojects.collect { + it.configurations.testRuntime + } + from subprojects.collect { + it.configurations.provided + } +} + +// Task called by dist_test.py to generate the needed .isolate and .gen.json +// files needed to run the distributed tests. +task distTest(type: DistTestTask, dependsOn: copyDistTestJars) { + subprojects.each { + it.tasks.withType(Test).each { + addTestTask it + } + } +} diff --git a/java-scala-spark4/buildSrc/build.gradle b/java-scala-spark4/buildSrc/build.gradle new file mode 100644 index 0000000000..28f03f1ec5 --- /dev/null +++ b/java-scala-spark4/buildSrc/build.gradle @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains the dependencies required for the gradle build itself. + +repositories { + mavenCentral() + jcenter() + maven { url "https://clojars.org/repo" } // Only used for the clojure plugin below. + maven { url "https://plugins.gradle.org/m2/" } +} + +// Manage plugin dependencies since the plugin block can't be used in included build scripts yet. +// For more details see: https://docs.gradle.org/current/userguide/plugins.html#plugins_dsl_limitations +dependencies { + compile "com.github.ben-manes:gradle-versions-plugin:0.41.0" + compile "com.github.jengelman.gradle.plugins:shadow:6.1.0" + compile "gradle.plugin.org.barfuin.gradle.jacocolog:gradle-jacoco-log:1.2.4" + compile "gradle.plugin.com.google.gradle:osdetector-gradle-plugin:1.7.0" + compile "com.google.protobuf:protobuf-gradle-plugin:0.8.18" + compile "com.netflix.nebula:nebula-clojure-plugin:10.1.1" + compile "gradle.plugin.com.github.spotbugs.snom:spotbugs-gradle-plugin:4.7.0" + compile "gradle.plugin.cn.bestwu.gradle:propdeps-plugin:0.0.10" + compile "net.ltgt.gradle:gradle-errorprone-plugin:2.0.2" + compile "ru.vyarus:gradle-animalsniffer-plugin:1.5.4" + compile "com.google.code.gson:gson:2.8.9" + compile "cz.alenkacz:gradle-scalafmt:1.14.0" + compile "com.google.guava:guava:31.0.1-jre" + compile "me.champeau.gradle:jmh-gradle-plugin:0.5.3" +} + +// Compiler configuration +tasks.withType(GroovyCompile) { + options.compilerArgs << '-proc:none' // Ignore leaked annotation processors on the compile classpath. +} diff --git a/java-scala-spark4/buildSrc/src/main/groovy/org/apache/kudu/gradle/DistTestTask.java b/java-scala-spark4/buildSrc/src/main/groovy/org/apache/kudu/gradle/DistTestTask.java new file mode 100644 index 0000000000..8b7065fdc6 --- /dev/null +++ b/java-scala-spark4/buildSrc/src/main/groovy/org/apache/kudu/gradle/DistTestTask.java @@ -0,0 +1,337 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.kudu.gradle; + +import org.gradle.api.DefaultTask; +import org.gradle.api.file.FileCollection; +import org.gradle.api.file.FileTree; +import org.gradle.api.internal.tasks.testing.TestClassProcessor; +import org.gradle.api.internal.tasks.testing.TestClassRunInfo; +import org.gradle.api.internal.tasks.testing.TestResultProcessor; +import org.gradle.api.internal.tasks.testing.detection.DefaultTestClassScanner; +import org.gradle.api.internal.tasks.testing.detection.TestFrameworkDetector; +import org.gradle.api.logging.Logger; +import org.gradle.api.logging.Logging; +import org.gradle.api.tasks.Input; +import org.gradle.api.tasks.InputFiles; +import org.gradle.api.tasks.OutputDirectory; +import org.gradle.api.tasks.TaskAction; +import org.gradle.api.tasks.options.Option; +import org.gradle.api.tasks.testing.Test; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.io.Files; +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; +import com.google.gson.GsonBuilder; + +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.InputStream; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * This task is used in our top build.gradle file. It is called + * by dist_test.py to generate the needed .isolate and .gen.json + * files needed to run the distributed tests. + */ +public class DistTestTask extends DefaultTask { + private static final Logger LOGGER = Logging.getLogger(DistTestTask.class); + + private static final Gson GSON = new GsonBuilder() + .setPrettyPrinting() + .create(); + + String distTestBin = getProject().getRootDir() + "/../build-support/dist_test.py"; + + @OutputDirectory + File outputDir = new File(getProject().getBuildDir(), "dist-test"); + + private List testTasks = Lists.newArrayList(); + + private boolean collectTmpDir = false; + + /** + * Called by build.gradle to add test tasks to be considered for dist-tests. + */ + public void addTestTask(Test t) { + testTasks.add(t); + } + + @Option(option = "classes", + description = "Sets test class to be included, '*' is supported.") + public DistTestTask setClassPattern(List classPattern) { + for (Test t : testTasks) { + // TODO: this requires a glob like **/*Foo* instead of just *Foo* + t.setIncludes(classPattern); + } + return this; + } + + /** + * Not actually used, but gradle mandates that the @Input annotation be placed + * on a getter, and we need @Input so that the task is rerun if the value of + * the 'collect-tmpdir' option changes. + */ + @Input + public boolean getCollectTmpDir() { + return collectTmpDir; + } + + @Option(option = "collect-tmpdir", + description = "Archives the test's temp directory as an artifact if the test fails.") + public DistTestTask setCollectTmpdir() { + collectTmpDir = true; + return this; + } + + @InputFiles + public FileCollection getInputClasses() { + FileCollection fc = getProject().files(); // Create an empty FileCollection. + for (Test t : testTasks) { + fc = fc.plus(t.getCandidateClassFiles()); + } + return fc; + } + + @TaskAction + public void doStuff() throws IOException { + getProject().delete(outputDir); + getProject().mkdir(outputDir); + List baseDeps = getBaseDeps(); + for (Test t : testTasks) { + List testClassNames = collectTestNames(t); + for (String c : testClassNames) { + File isolateFile = new File(outputDir, c + ".isolate"); + File isolatedFile = new File(outputDir, c + ".isolated"); + File genJsonFile = new File(outputDir, c + ".gen.json"); + Files.asCharSink(isolateFile, UTF_8).write(genIsolate(outputDir.toPath(), t, c, baseDeps)); + + // Write the gen.json + GenJson gen = new GenJson(); + gen.args = ImmutableList.of( + "-i", isolateFile.toString(), + "-s", isolatedFile.toString()); + gen.dir = outputDir.toString(); + gen.name = c; + Files.asCharSink(genJsonFile, UTF_8).write(GSON.toJson(gen)); + } + } + } + + /** + * Calls dist_test.py to get the c++ "base" dependencies so that we can + * include them in the .isolate files. + * + * Note: This currently fails OSX because dump_base_deps use ldd. + */ + private List getBaseDeps() throws IOException { + Process proc = new ProcessBuilder(distTestBin, + "internal", + "dump_base_deps") + .redirectError(ProcessBuilder.Redirect.INHERIT) + .start(); + + try (InputStream is = proc.getInputStream()) { + return new Gson().fromJson(new InputStreamReader(is, UTF_8), + new TypeToken>(){}.getType()); + } + } + + /** + * @return all test result reporting environment variables and their values, + * in a format suitable for consumption by run_dist_test.py. + */ + private List getTestResultReportingEnvironmentVariables() { + ImmutableList.Builder args = new ImmutableList.Builder<>(); + String enabled = System.getenv("KUDU_REPORT_TEST_RESULTS"); + if (enabled != null && Integer.parseInt(enabled) > 0) { + for (String ev : ImmutableList.of("KUDU_REPORT_TEST_RESULTS", + "BUILD_CONFIG", + "BUILD_TAG", + "GIT_REVISION", + "TEST_RESULT_SERVER")) { + String evValue = System.getenv(ev); + if (evValue == null || evValue.isEmpty()) { + if (ev.equals("TEST_RESULT_SERVER")) { + // This one is optional. + continue; + } + throw new RuntimeException( + String.format("Required env variable %s is missing", ev)); + } + args.add("-e"); + args.add(String.format("%s=%s", ev, evValue)); + } + } + return args.build(); + } + + private String genIsolate(Path isolateFileDir, Test test, String testClass, + List baseDeps) throws IOException { + Path rootDir = test.getProject().getRootDir().toPath(); + Path binDir = rootDir.resolve("../build/latest/bin").toRealPath(); + Path buildSupportDir = rootDir.resolve("../build-support").toRealPath(); + Path buildDir = rootDir.resolve("build"); + File jarDir = buildDir.resolve("jars").toFile(); + + // Build classpath with relative paths. + List classpath = Lists.newArrayList(); + for (File f : test.getClasspath().getFiles()) { + File projectFile = f; + // This hack changes the path to dependent jars from the gradle cache + // in ~/.gradle/caches/... to a path to the jars copied under the project + // build directory. See the copyDistTestJars task in build.gradle to see + // the copy details. + if (projectFile.getAbsolutePath().contains(".gradle/caches/")) { + projectFile = new File(jarDir, projectFile.getName()); + } + + String s = isolateFileDir.relativize(projectFile.toPath().toAbsolutePath()).toString(); + // Isolate requires that directories be listed with a trailing '/'. + if (projectFile.isDirectory()) { + s += "/"; + } + // Gradle puts resources directories into the classpath even if they don't exist. + // isolate is unhappy with non-existent paths, though. + if (projectFile.exists()) { + classpath.add(s); + } + } + + // Build up the actual Java command line to run the test. + ImmutableList.Builder cmd = new ImmutableList.Builder<>(); + cmd.add(isolateFileDir.relativize(buildSupportDir.resolve("run_dist_test.py")).toString()); + if (collectTmpDir) { + cmd.add("--collect-tmpdir"); + } + cmd.add("--test-language=java"); + cmd.addAll(getTestResultReportingEnvironmentVariables()); + cmd.add("--", + "-ea", + "-cp", + Joiner.on(":").join(classpath)); + for (Map.Entry e : test.getSystemProperties().entrySet()) { + cmd.add("-D" + e.getKey() + "=" + e.getValue()); + } + cmd.add("-DkuduBinDir=" + isolateFileDir.relativize(binDir), + "org.junit.runner.JUnitCore", + testClass); + + // Output the actual JSON. + IsolateFileJson isolate = new IsolateFileJson(); + isolate.variables.command = cmd.build(); + isolate.variables.files.addAll(classpath); + for (String s : baseDeps) { + File f = new File(s); + String path = isolateFileDir.relativize(f.toPath().toAbsolutePath()).toString(); + if (f.isDirectory()) { + path += "/"; + } + isolate.variables.files.add(path); + } + + String json = isolate.toJson(); + + // '.isolate' files are actually Python syntax, rather than true JSON. + // However, the two are close enough that just doing this replacement + // tends to work (we're assuming that no one has a quote character in a + // file path or system property. + return json.replace('"', '\''); + } + + // This is internal API but required to get the filtered list of test classes and process them. + // See the gradle code here which was used for reference: + // https://github.com/gradle/gradle/blob/c2067eaa129af4c9c29ad08da39d1c853eec4c59/subprojects/testing-jvm/src/main/java/org/gradle/api/internal/tasks/testing/detection/DefaultTestExecuter.java#L104-L112 + private List collectTestNames(Test testTask) { + ClassNameCollectingProcessor processor = new ClassNameCollectingProcessor(); + Runnable detector; + final FileTree testClassFiles = testTask.getCandidateClassFiles(); + if (testTask.isScanForTestClasses()) { + TestFrameworkDetector testFrameworkDetector = testTask.getTestFramework().getDetector(); + testFrameworkDetector.setTestClasses(testTask.getTestClassesDirs().getFiles()); + testFrameworkDetector.setTestClasspath(testTask.getClasspath().getFiles()); + detector = new DefaultTestClassScanner(testClassFiles, testFrameworkDetector, processor); + } else { + detector = new DefaultTestClassScanner(testClassFiles, null, processor); + } + detector.run(); + LOGGER.debug("collected test class names: {}", processor.classNames); + return processor.classNames; + } + + private static class ClassNameCollectingProcessor implements TestClassProcessor { + public List classNames = new ArrayList<>(); + + @Override + public void startProcessing(TestResultProcessor testResultProcessor) { + // no-op + } + + @Override + public void processTestClass(TestClassRunInfo testClassRunInfo) { + classNames.add(testClassRunInfo.getTestClassName()); + } + + @Override + public void stop() { + // no-op + } + + @Override + public void stopNow() { + // no-op + } + } + + /** + * Structured to generate Json that matches the expected .isolate format. + * See here for a description of the .isolate format: + * https://github.com/cloudera/dist_test/blob/master/grind/python/disttest/isolate.py + */ + private static class IsolateFileJson { + private static class Variables { + public List files = new ArrayList<>(); + public List command; + } + Variables variables = new Variables(); + + public String toJson() { + return GSON.toJson(this); + } + } + + /** + * Structured to generate Json that matches the expected .gen.json contents. + * See here for a description of the .gen.json contents: + * https://github.com/cloudera/dist_test/blob/master/grind/python/disttest/isolate.py + */ + private static class GenJson { + int version = 1; + String dir; + List args; + String name; + } +} diff --git a/java-scala-spark4/config/checkstyle/checkstyle.xml b/java-scala-spark4/config/checkstyle/checkstyle.xml new file mode 100644 index 0000000000..b0ef88f5b8 --- /dev/null +++ b/java-scala-spark4/config/checkstyle/checkstyle.xml @@ -0,0 +1,303 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/java-scala-spark4/config/checkstyle/suppressions.xml b/java-scala-spark4/config/checkstyle/suppressions.xml new file mode 100644 index 0000000000..86c7d98926 --- /dev/null +++ b/java-scala-spark4/config/checkstyle/suppressions.xml @@ -0,0 +1,28 @@ + + + + + + + \ No newline at end of file diff --git a/java-scala-spark4/config/spotbugs/excludeFilter.xml b/java-scala-spark4/config/spotbugs/excludeFilter.xml new file mode 100644 index 0000000000..11da3f9978 --- /dev/null +++ b/java-scala-spark4/config/spotbugs/excludeFilter.xml @@ -0,0 +1,378 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/java-scala-spark4/gradle.properties b/java-scala-spark4/gradle.properties new file mode 100644 index 0000000000..b490f9fb5e --- /dev/null +++ b/java-scala-spark4/gradle.properties @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This file contains project properties. +# More about how to use the gradle.properties file can be read here: +# https://docs.gradle.org/current/userguide/build_environment.html#sec:gradle_configuration_properties + +group = org.apache.kudu +url = https://kudu.apache.org/ + +# The Maven respository used when uploading artifacts. +mavenPublishUrl = https://repository.apache.org/service/local/staging/deploy/maven2 + +# Version passed to javac's -source, -target, and --release (Java 9+) parameters: +# +# -source Specifies the version of source code accepted. +# -target Generate class files that target a specified version of the VM. Class +# files will run on the specified target and on later versions, but not +# on earlier versions of the VM. +# --release (Java 9+) Configures the compiler to produce class files that will +# link against an implementation of the given platform version. +# (See http://openjdk.java.net/jeps/247) +javaCompatibility = 8 + +encoding = UTF-8 +# Used by the scalafmt plugin because the encoding property can't be passed. +systemProp.file.encoding = UTF-8 + +# gpg-agent configuration for artifact signing. +# See https://docs.gradle.org/current/userguide/signing_plugin.html#sec:using_gpg_agent +signing.gnupg.executable = gpg +signing.gnupg.useLegacyGpg = true + +# Maximum parallel forks to use while unit testing. +maxParallelForks = 1 + +# Warn about deprecated gradle usage +org.gradle.warning.mode = summary + +# Flags to speed up the gradle build. +# https://guides.gradle.org/performance/ +# https://docs.gradle.org/current/userguide/build_environment.html +org.gradle.daemon = true +org.gradle.parallel = true +#org.gradle.caching = true + +# The below parameters are used in the `gradle wrapper` to generate `gradlew` +# and `gradle-wrapper.properties` +gradleDistUrlBase = https://services.gradle.org/distributions + +# Leverages the wrapper jar checked into the gradle project on github because +# the jar isn't available elsewhere. Using raw.githubusercontent.com instead of +# github.com because github.com servers deprecated TLSv1/TLSv1.1 support some +# time ago, so older versions of curl (built against OpenSSL library that +# doesn't support TLSv1.2) would fail to fetch the jar. +# This jar doesn't change frequently and doesn't need to be updated to match +# the Gradle version to function properly. +wrapperBaseUrl = https://raw.githubusercontent.com/gradle/gradle/v6.8.3/gradle/wrapper + +# Increase the default Java heap size for the build to avoid OOM issues. +# https://docs.gradle.org/current/userguide/build_environment.html#sec:configuring_jvm_memory +org.gradle.jvmargs=-Xmx1g -Dfile.encoding=UTF-8 diff --git a/java-scala-spark4/gradle/artifacts.gradle b/java-scala-spark4/gradle/artifacts.gradle new file mode 100644 index 0000000000..844fdf8383 --- /dev/null +++ b/java-scala-spark4/gradle/artifacts.gradle @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains common tasks and configuration for artifact generation. + +// Create a configuration so that the test jar can be referenced in other modules. +configurations.create("test") + +task testJar(type: Jar, dependsOn: testClasses, group: "Build") { + description = "Assembles a jar archive containing the test classes." + from sourceSets.test.output + classifier = "tests" + extension "jar" +} + +task sourcesJar(type: Jar, dependsOn: classes, group: "Build") { + description = "Assembles a jar archive containing the main source." + from sourceSets.main.allSource + classifier "sources" + extension "jar" +} + +task testSourcesJar(type: Jar, dependsOn: testJar, group: "Build") { + description = "Assembles a jar archive containing the test source." + from sourceSets.test.allSource + classifier "test-sources" + extension "jar" +} + +task javadocJar(type: Jar, dependsOn: javadoc, group: "Build") { + description = "Assembles a jar archive containing the javadoc." + from javadoc.destinationDir + classifier "javadoc" + extension "jar" +} + +tasks.withType(Jar) { + // Configure the manifest content of the Jars. + manifest { + attributes "Implementation-Title": "$project.archivesBaseName" + attributes "Implementation-Version": "$project.version" + } + from ("$rootDir/../NOTICE.txt") { + into "META-INF" + } + from ("$rootDir/../LICENSE.txt") { + into "META-INF" + } + // Prevent empty directories in the Jars. + includeEmptyDirs = false +} + +artifacts { + test testJar + archives testJar + archives sourcesJar + archives testSourcesJar + archives javadocJar +} \ No newline at end of file diff --git a/java-scala-spark4/gradle/benchmarks.gradle b/java-scala-spark4/gradle/benchmarks.gradle new file mode 100644 index 0000000000..3de899d5ad --- /dev/null +++ b/java-scala-spark4/gradle/benchmarks.gradle @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains common tasks and configuration for benchmarking. + +apply plugin: "me.champeau.gradle.jmh" + +// Add the JMH plugin dependencies. +dependencies { + jmh libs.jmhCore + jmh libs.jmhGenerator +} + +// Configure the JMH plugin. +jmh { + duplicateClassesStrategy = 'exclude' + zip64 = true +} diff --git a/java-scala-spark4/gradle/compile.gradle b/java-scala-spark4/gradle/compile.gradle new file mode 100644 index 0000000000..896b24a404 --- /dev/null +++ b/java-scala-spark4/gradle/compile.gradle @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains common compiler configurations. + +// JVM based compiler configuration +tasks.withType(AbstractCompile) { + sourceCompatibility = "1.$javaCompatibility" + targetCompatibility = "1.$javaCompatibility" + // --release is the recommended way to select the target release, but it's only supported in + // Java 9+ so we also set -source and -target via `sourceCompatibility` and `targetCompatibility`. + // If/when Gradle supports `--release` natively (https://github.com/gradle/gradle/issues/2510), + // we should switch to that. + if (JavaVersion.current().isJava9Compatible()) { + options.compilerArgs << "--release" << "$javaCompatibility" + } + options.encoding = encoding // make sure the encoding is defined by the project and not the system default. + options.incremental = true // enable incremental compilation. + options.compilerArgs << '-proc:none' // Ignore leaked annotation processors on the compile classpath. +} + +// Scala compiler configuration +tasks.withType(ScalaCompile) { + scalaCompileOptions.encoding = encoding // make sure the encoding is defined by the project and not the system default. + scalaCompileOptions.additionalParameters = [ + // Emit warning and location for usages of features that should be imported explicitly. + "-feature", + // Emit various static analysis warnings. + "-Xlint" + ] +} \ No newline at end of file diff --git a/java-scala-spark4/gradle/dependencies.gradle b/java-scala-spark4/gradle/dependencies.gradle new file mode 100644 index 0000000000..5686c043a7 --- /dev/null +++ b/java-scala-spark4/gradle/dependencies.gradle @@ -0,0 +1,142 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains all of the dependencies required for the build. +// Centrally locating all of the dependencies ensures each subproject +// uses the same dependency version for all dependencies used. + +ext { + versions = [:] + libs = [:] +} + +versions += [ + async : "1.4.1", + checkstyle : "8.36.1", + clojure : "1.10.3", + clojureToolsCli: "1.0.206", + commonsIo : "2.11.0", + errorProne : "2.3.3", + errorProneJavac: "9+181-r4173-1", + gradle : "6.9.2", + guava : "32.1.1-jre", + hadoop : "3.3.1", + hamcrest : "2.2", + hdrhistogram : "2.1.12", + hive : "3.1.2", + httpClient : "4.5.13", + jacoco : "0.8.6", + jepsen : "0.1.5", + jetty : "9.4.44.v20210927", + jmh : "1.34", + jsr305 : "3.0.2", + junit : "4.13.2", + log4j : "2.17.1", + micrometer : "1.8.2", + mockito : "4.2.0", + murmur : "1.0.0", + netty : "4.1.110.Final", + osdetector : "1.6.2", + protobuf : "3.21.12", + ranger : "2.1.0", + scala211 : "2.11.12", + scala : "2.13.14", + scalatest : "3.2.10", + scopt : "4.0.1", + slf4j : "1.7.33", + spark2 : "2.4.8", + spark : "4.0.0-preview1", + spotBugs : "4.1.1", + yetus : "0.13.0", + scalaParallelCollections : "0.2.0" +] + +// Log the Gradle version used vs defined. +if (gradle.gradleVersion != versions.gradle) { + logger.warn("Using gradle version $gradle.gradleVersion (Build defines $versions.gradle)") +} + +// If the `spark2` property is passed, override the `spark` and `scala` version +// to use the `spark2` and `scala211` versions. +if (propertyExists("spark2")) { + versions["spark"] = "$versions.spark2" + versions["scala"] = "$versions.scala211" +} + +// Add base Scala version +versions["scalaBase"] = versions.scala.substring(0, versions.scala.lastIndexOf(".")) + +// Add base Spark version +versions["sparkBase"] = versions.spark.substring(0, versions.spark.indexOf(".")) + +libs += [ + async : "com.stumbleupon:async:$versions.async", + clojure : "org.clojure:clojure:$versions.clojure", + clojureToolsCli : "org.clojure:tools.cli:$versions.clojureToolsCli", + commonsIo : "commons-io:commons-io:$versions.commonsIo", + errorProne : "com.google.errorprone:error_prone_core:$versions.errorProne", + errorProneJavac : "com.google.errorprone:javac:$versions.errorProneJavac", + guava : "com.google.guava:guava:$versions.guava", + hadoopClient : "org.apache.hadoop:hadoop-client:$versions.hadoop", + hadoopCommon : "org.apache.hadoop:hadoop-common:$versions.hadoop", + hadoopMiniCluster : "org.apache.hadoop:hadoop-minicluster:$versions.hadoop", + hadoopMRClientCommon : "org.apache.hadoop:hadoop-mapreduce-client-common:$versions.hadoop", + hadoopMRClientCore : "org.apache.hadoop:hadoop-mapreduce-client-core:$versions.hadoop", + hamcrest : "org.hamcrest:hamcrest:$versions.hamcrest", + hdrhistogram : "org.hdrhistogram:HdrHistogram:$versions.hdrhistogram", + hiveMetastore : "org.apache.hive:hive-metastore:$versions.hive", + hiveTestUtils : "org.apache.hive:hive-testutils:$versions.hive", + httpClient : "org.apache.httpcomponents:httpclient:$versions.httpClient", + httpMime : "org.apache.httpcomponents:httpmime:$versions.httpClient", + jepsen : "jepsen:jepsen:$versions.jepsen", + jetty : "org.eclipse.jetty:jetty-server:$versions.jetty", + jettyServlet : "org.eclipse.jetty:jetty-servlet:$versions.jetty", + jmhCore : "org.openjdk.jmh:jmh-core:$versions.jmh", + jmhGenerator : "org.openjdk.jmh:jmh-generator-annprocess:$versions.jmh", + jsr305 : "com.google.code.findbugs:jsr305:$versions.jsr305", + junit : "junit:junit:$versions.junit", + log4jApi : "org.apache.logging.log4j:log4j-api:$versions.log4j", + log4jCompat : "org.apache.logging.log4j:log4j-1.2-api:$versions.log4j", + log4jCore : "org.apache.logging.log4j:log4j-core:$versions.log4j", + log4jSlf4jImpl : "org.apache.logging.log4j:log4j-slf4j-impl:$versions.log4j", + micrometerCore : "io.micrometer:micrometer-core:$versions.micrometer", + mockitoCore : "org.mockito:mockito-core:$versions.mockito", + murmur : "com.sangupta:murmur:$versions.murmur", + netty : "io.netty:netty-all:$versions.netty", + nettyBuffer : "io.netty:netty-buffer:$versions.netty", + nettyCommon : "io.netty:netty-common:$versions.netty", + nettyHandler : "io.netty:netty-handler:$versions.netty", + nettyTransport : "io.netty:netty-transport:$versions.netty", + nettyTransportEpoll : "io.netty:netty-transport-native-epoll:$versions.netty", + nettyTransportUnix : "io.netty:netty-transport-native-unix-common:$versions.netty", + osdetector : "com.google.gradle:osdetector-gradle-plugin:$versions.osdetector", + protobufJava : "com.google.protobuf:protobuf-java:$versions.protobuf", + protobufJavaUtil : "com.google.protobuf:protobuf-java-util:$versions.protobuf", + protoc : "com.google.protobuf:protoc:$versions.protobuf", + rangerPlugin : "org.apache.ranger:ranger-plugins-common:$versions.ranger", + scalaLibrary : "org.scala-lang:scala-library:$versions.scala", + scalap : "org.scala-lang:scalap:$versions.scala", + scalatest : "org.scalatest:scalatest_$versions.scalaBase:$versions.scalatest", + scopt : "com.github.scopt:scopt_$versions.scalaBase:$versions.scopt", + slf4jApi : "org.slf4j:slf4j-api:$versions.slf4j", + sparkAvro : "org.apache.spark:spark-avro_$versions.scalaBase:$versions.spark", + sparkCore : "org.apache.spark:spark-core_$versions.scalaBase:$versions.spark", + sparkSql : "org.apache.spark:spark-sql_$versions.scalaBase:$versions.spark", + sparkSqlTest : "org.apache.spark:spark-sql_$versions.scalaBase:$versions.spark:tests", + yetusAnnotations : "org.apache.yetus:audience-annotations:$versions.yetus", + scalaParallelCollections : "org.scala-lang.modules:scala-parallel-collections_2.13:$versions.scalaParallelCollections" +] diff --git a/java-scala-spark4/gradle/docs.gradle b/java-scala-spark4/gradle/docs.gradle new file mode 100644 index 0000000000..d9ef7cf08a --- /dev/null +++ b/java-scala-spark4/gradle/docs.gradle @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains common tasks and configuration for documentation generation. + +configurations { + doclet +} + +dependencies { + doclet libs.yetusAnnotations +} + +// Javadoc Configuration +// We use afterEvaluate to add additional configuration once all the definitions +// in the projects build script have been applied +afterEvaluate { + tasks.withType(Javadoc) { + if(JavaVersion.current().isJava10Compatible()) { + // Disable Javadoc generation on Java 10+ until it is fixed. + enabled = false + } + // Configure the Yetus doclet + options.docletpath = configurations.doclet.files.asType(List) + options.doclet = "org.apache.yetus.audience.tools.IncludePublicAnnotationsStandardDoclet" + // Link to Java SE javadoc + options.links "https://docs.oracle.com/javase/$javaCompatibility/docs/api/" + // Hacky workaround to YETUS-627. Replace with options.noTimestamp + // once this workaround is no longer needed. + options.addStringOption("notimestamp", "org.apache.kudu") + } +} \ No newline at end of file diff --git a/java-scala-spark4/gradle/properties.gradle b/java-scala-spark4/gradle/properties.gradle new file mode 100644 index 0000000000..1c89214f8b --- /dev/null +++ b/java-scala-spark4/gradle/properties.gradle @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains methods to be used in the build to load and +// set build properties consistently. + +// ext makes these methods callable project wide +ext { + // A common method to handle loading gradle properties with a default when + // no definition is found. The property value is determined by the following + // priority order (top is highest priority): + // - gradle property (-Pproperty=value) + // - system property (-Dproperty=value) + // - default value + // See more details on gradle property handling here: + // https://docs.gradle.org/current/userguide/build_environment.html#sec:gradle_properties_and_system_properties + propertyWithDefault = { property, defaultValue -> + def value = defaultValue + def systemValue = System.getProperty(property) + if (systemValue != null) { + value = systemValue + } + def projectValue = project.hasProperty(property) ? project.getProperty(property) : null + if (projectValue != null) { + value = projectValue + } + logger.info("Resolved property $property with final value $value " + + "[defaultValue=$defaultValue, systemValue=$systemValue, projectValue=$projectValue]") + return value + } + + // Returns true if the property has been set, otherwise false. + propertyExists = { property -> + if (System.getProperty(property) != null || project.hasProperty(property)) { + return true + } + return false + } +} \ No newline at end of file diff --git a/java-scala-spark4/gradle/protobuf.gradle b/java-scala-spark4/gradle/protobuf.gradle new file mode 100644 index 0000000000..a82e0b92f3 --- /dev/null +++ b/java-scala-spark4/gradle/protobuf.gradle @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains the common protobuf plugin configuration. +import org.gradle.plugins.ide.eclipse.model.SourceFolder +apply plugin: "com.google.protobuf" + +// Use the maven protoc artifact instead of local. +protobuf { + protoc { + // If -PuseKuduProtoc is set, use the protoc from Kudu's thirdparty instead + // of using the protoc Maven artifacts. This is useful if a specific + // OS or architecture is not published to Maven. + if (propertyExists("useKuduProtoc")) { + logger.warn("Using Protoc from: $rootDir/../thirdparty/installed/uninstrumented/bin/protoc") + path = "$rootDir/../thirdparty/installed/uninstrumented/bin/protoc" + } else { + artifact = libs.protoc + } + } + generateProtoTasks { + all().each { task -> + task.builtins { + java { + // Tell protoc to mark generated java classes with @Generated. + option 'annotate_code' + } + } + } + } +} + +// Configure Intellij to see the generated classes. +idea { + module { + generatedSourceDirs += file("${protobuf.generatedFilesBaseDir}/main/java") + } +} + +// Configure Eclipse to see the generated classes. +eclipse { + classpath { + file { + whenMerged { + // protobuf.generatedFilesBaseDir contains absolute path to the root project. + // Here we have to make the path relative to a subproject, + // because Buildship ignores absolute paths when it evaluates classpath entries. + + // Passing 'null' as a second argument to SourceFolder constructor + // instructs Eclipse to not compile generated source files + def relPath = projectDir.toPath().relativize( + file("${protobuf.generatedFilesBaseDir}/main/java").toPath()) + entries += new SourceFolder(relPath.toString(), null) + } + } + } +} diff --git a/java-scala-spark4/gradle/publishing.gradle b/java-scala-spark4/gradle/publishing.gradle new file mode 100644 index 0000000000..9ed2d52d3b --- /dev/null +++ b/java-scala-spark4/gradle/publishing.gradle @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains common tasks and configuration for artifact publishing. + +apply plugin: "maven" +apply plugin: "signing" + +ext { + // This allows simplified builds and local maven installs. + forceSigning = propertyExists("forceSigning") + skipSigning = propertyExists("skipSigning") + shouldSign = + // Always sign artifacts if -PforceSigning is passed. + forceSigning || + // Skip signing artifacts by default if -PskipSigning is passed. + (!skipSigning + // Sign artifacts if the version is not a snapshot, and we are uploading them to maven. + && !version.endsWith("SNAPSHOT") + && project.gradle.startParameter.taskNames.any { it.contains("upload") }) + // These properties can be set in ~/.gradle/gradle.properties file, + // though it would be open text. They can also be set on the cli via + // -PmavenUsername and -PmavenPassword. + mavenPublishUrl = propertyWithDefault("mavenPublishUrl", "") + mavenUsername = propertyWithDefault("mavenUsername", "") + mavenPassword = propertyWithDefault("mavenPassword", "") +} + +uploadArchives { + repositories { + signing { + required { shouldSign } + // Check if we are going to sign, because CI environments may not have + // gpg on their path and useGpgCmd evaluates eagerly. + if (shouldSign) { + useGpgCmd() // Use gpg-agent to sign + } + sign configurations.archives + mavenDeployer { + beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) } + repository(url: "${mavenPublishUrl}") { + authentication(userName: "${mavenUsername}", password: "${mavenPassword}") + } + afterEvaluate { + pom.project { + url = "$url" + licenses { + license { + name = "The Apache Software License, Version 2.0" + url = "http://www.apache.org/licenses/LICENSE-2.0.txt" + distribution = "repo" + } + } + } + } + } + } + } +} + +// Add the install task to the "Upload" group so it's visible in the tasks output. +install.group = "Upload" + +// Sort the generated maven dependencies to make pom comparisons easier. +tasks.withType(Upload) { + def installer = install.repositories.mavenInstaller + def deployer = uploadArchives.repositories.mavenDeployer + + [installer, deployer]*.pom*.whenConfigured { pom -> + pom.dependencies = pom.dependencies.sort { dep -> + "$dep.scope:$dep.optional:$dep.groupId:$dep.artifactId" + } + } +} + + diff --git a/java-scala-spark4/gradle/quality.gradle b/java-scala-spark4/gradle/quality.gradle new file mode 100644 index 0000000000..de58c8283b --- /dev/null +++ b/java-scala-spark4/gradle/quality.gradle @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains common tasks and configuration for checking the quality of the code. + +apply plugin: "com.github.spotbugs" // Performs static code analysis to look for bugs in Java code. +apply plugin: "com.github.ben-manes.versions" // Provides a task to determine which dependencies have updates. +apply plugin: "ru.vyarus.animalsniffer" // Ensures Java code uses APIs from a particular version of Java. + +def ignoreCheckFailures = false +if (propertyExists("ignoreCheckFailures")) { + ignoreCheckFailures = true +} + +// For other than development environments (e.g., building Kudu release JARs +// in an automated release engineering environment), it makes sense to skip +// code style verification tasks since the code is assumed to be already +// passed those in various pre-commit builds. For that, define the +// 'skipCodeStyleChecks' property. +if (!propertyExists("skipCodeStyleChecks")) { + // Ensures Java code follows the defined coding style. + apply plugin: "checkstyle" + + // Automatically formats Scala code on each build. + apply plugin: "scalafmt" + + checkstyle { + toolVersion = versions.checkstyle + configDir = file("$rootProject.projectDir/config/checkstyle") + ignoreFailures = ignoreCheckFailures + maxWarnings = 0 + showViolations = true + } + + // Create an aggregate checkstyle task. + // This simplifies running checkstyle on all the code by only needing one task instead of multiple in your command. + task checkstyle(dependsOn: [checkstyleMain, checkstyleTest], group: "Verification") { + description = "Run Checkstyle analysis." + } + + scalafmt { + configFilePath = "$rootDir/.scalafmt.conf" + } + + // Run scalafmt on compile. + tasks.withType(ScalaCompile) { + if (!propertyExists("skipFormat")) { + dependsOn("scalafmtAll") + } + } +} + +spotbugs { + toolVersion = versions.spotBugs + ignoreFailures = ignoreCheckFailures + effort = "max" + reportLevel = "low" + excludeFilter = file("$rootProject.projectDir/config/spotbugs/excludeFilter.xml") +} + +tasks.withType(com.github.spotbugs.snom.SpotBugsTask) { + reports { + xml.enabled false + html.enabled true + } +} + +// Create an aggregate spotbugs task. +// This simplifies running spotbugs on all the code by only needing one task instead of multiple in your command. +task spotbugs(dependsOn: [spotbugsMain, spotbugsTest], group: "Verification") { + description = "Run SpotBugs analysis." +} + +// Errorprone doesn't support Java 11+ +// https://github.com/google/error-prone/issues/1106 +if(!JavaVersion.current().isJava11Compatible()) { + apply plugin: "net.ltgt.errorprone" // Performs static code analysis to look for bugs in Java code. + + // Configure error-prone. + // Running with '-Derrorprone-fix=...' can instruct error-prone to automatically fix issues. + tasks.withType(JavaCompile) { + options.errorprone { + disableWarningsInGeneratedCode = true + excludedPaths = '.*/build/generated.*/.*' + allErrorsAsWarnings = false + def fix = propertyWithDefault("errorprone-fix", "") + if (!fix.isEmpty()) { + errorproneArgs += ['-XepPatchChecks:' + fix, '-XepPatchLocation:IN_PLACE'] + } + } + } + dependencies { + // Set a specific version of Error Prone + errorprone libs.errorProne + // Configure Error Prone to use it's own static javac as described here: + // https://github.com/tbroyer/gradle-errorprone-plugin#jdk-8-support + errorproneJavac libs.errorProneJavac + } +} + +// Don't enable code coverage for kudu-proto given it is exclusively generated code. +// Don't enable code coverage for kudu-jepsen given it is exclusively test code. +if (project.name != "kudu-proto" && project.name != "kudu-jepsen") { + apply plugin: "jacoco" // Provides code coverage metrics for Java code. + jacoco { + toolVersion = versions.jacoco + } + tasks.withType(JacocoReport) { + reports { + csv.enabled false + xml.enabled true + html.enabled true + } + } + jacocoTestReport { + dependsOn test // tests are required to run before generating the report + } +} + +// Define a Java API signature for use by animal-sniffer. It'll enforce that all +// Java API usage adheres to this signature. +dependencies { + signature "org.codehaus.mojo.signature:java1$javaCompatibility:1.0@signature" +} + +// Create an aggregate animal-sniffer task. +// This simplifies running animal-sniffer on all the code by only needing one task instead of multiple in your command. +task animalsniffer(dependsOn: [animalsnifferMain, animalsnifferTest], group: "Verification") { + description = "Run animal-sniffer analysis." +} + +// Configure the versions plugin to only show dependency updates for released versions. +dependencyUpdates { + gradleReleaseChannel = "current" + revision = "release" + resolutionStrategy { + componentSelection { rules -> + rules.all { ComponentSelection selection -> + boolean rejected = ["snap", "alpha", "beta", "rc", "cr", "m"].any { qualifier -> + selection.candidate.version ==~ /(?i).*[.-]${qualifier}[.\d-].*/ + } + if (rejected) { + selection.reject("Release candidate") + } + } + } + } +} diff --git a/java-scala-spark4/gradle/scopes.gradle b/java-scala-spark4/gradle/scopes.gradle new file mode 100644 index 0000000000..84a94a3d8f --- /dev/null +++ b/java-scala-spark4/gradle/scopes.gradle @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file adds provided scope and optional maven support. + +apply plugin: "cn.bestwu.propdeps" +apply plugin: "cn.bestwu.propdeps-eclipse" +apply plugin: "cn.bestwu.propdeps-idea" +apply plugin: "cn.bestwu.propdeps-maven" diff --git a/java-scala-spark4/gradle/shadow.gradle b/java-scala-spark4/gradle/shadow.gradle new file mode 100644 index 0000000000..9eb6cfd119 --- /dev/null +++ b/java-scala-spark4/gradle/shadow.gradle @@ -0,0 +1,161 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains tasks and configuration to support shading dependencies +// consistently when a subproject requires shaded artifacts. + +import org.gradle.api.internal.artifacts.publish.ArchivePublishArtifact + +apply plugin: "com.github.johnrengelman.shadow" + +knows.enabled = false // Disable the "easter egg" knows task. +knows.group = "" // Hide the "easter egg" knows task. +shadowJar.group = "" // Hide shadowJar task since it's used by the default build. + +// Add a property to explicitly allow slf4j shading. +ext { + shadowIncludeSlf4j = false +} + +// Configure a shaded jar to replace the default jar +shadowJar.classifier = null // Configure shadow jar to have the default classifier. +jar.finalizedBy(shadowJar) // Generate the shaded jar anytime the jar task is run. +jar.classifier = "unshaded" // Add an unshaded classifier to the default jar. + +// Add the shadowJar to the published artifacts. +artifacts { + archives shadowJar +} + +// Remove the unshaded jar from the published artifacts. +configurations.archives.artifacts.removeAll { + it instanceof ArchivePublishArtifact && it.archiveTask == jar +} + +// Ensure we always relocate these shaded dependencies to the same +// location across all modules. +shadowJar { + relocate "com.google.common", "org.apache.kudu.shaded.com.google.common" + relocate "com.google.gradle.osdetector", "org.apache.kudu.shaded.com.google.gradle.osdetector" + relocate "com.google.gson", "org.apache.kudu.shaded.com.google.gson" + relocate "com.google.protobuf", "org.apache.kudu.shaded.com.google.protobuf" + relocate "com.google.thirdparty", "org.apache.kudu.shaded.com.google.thirdparty" + relocate "com.sangupta", "org.apache.kudu.shaded.com.sangupta" + // Pulled in via osdetector. + relocate "kr.motd.maven", "org.apache.kudu.shaded.kr.motd.maven" + relocate "org.apache.http", "org.apache.kudu.shaded.org.apache.http" + relocate "org.apache.commons", "org.apache.kudu.shaded.org.apache.commons" + // Pulled in via Guava. + relocate "org.checkerframework", "org.apache.kudu.shaded.org.checkerframework" + relocate "org.hamcrest", "org.apache.kudu.shaded.org.hamcrest" + relocate "org.HdrHistogram", "org.apache.kudu.shaded.org.HdrHistogram" + // Pulled in via Micrometer. + relocate "org.LatencyUtils", "org.apache.kudu.shaded.org.LatencyUtils" + relocate "io.micrometer", "org.apache.kudu.shaded.io.micrometer" + relocate "io.netty", "org.apache.kudu.shaded.io.netty" + relocate "scopt", "org.apache.kudu.shaded.scopt" +} + +// ------------------------------------------------------------------ +// Everything below is a "hack" to support partial shading and +// accurate pom generation. At some point this logic should exist +// in the shadow plugin itself. +// https://github.com/johnrengelman/shadow/issues/166 +// https://github.com/johnrengelman/shadow/issues/159 +// ------------------------------------------------------------------ + +// Add a configuration to support unshaded compile dependencies. +// By default shadow assumes all dependencies are shaded. +configurations.create("compileUnshaded") +configurations.shadow.extendsFrom(configurations.compileUnshaded) +configurations.compile.extendsFrom(configurations.compileUnshaded) + +// We use afterEvaluate to add additional configuration once all the definitions +// in the projects build script have been applied +afterEvaluate { + // Ensure compileUnshaded dependencies are included in the pom. + [install, uploadArchives].each { task -> + task.repositories.each { + configure(it.pom.scopeMappings) { + // The priority value is arbitrary. + addMapping( + MavenPlugin.COMPILE_PRIORITY, + configurations.compileUnshaded, + Conf2ScopeMappingContainer.COMPILE) + } + } + } + + // Ensure we never shade SLF4J unless we explicitly specify it. + // This is a workaround because in the shadow plugin exclusions from + // parent modules are not respected in modules that use them. + if (!shadowIncludeSlf4j) { + shadowJar { + dependencies { + exclude(dependency("org.slf4j:slf4j-api:.*")) + } + } + } + + // Ensure compileUnshaded dependencies are not compiled into shadowJar. + project.configurations.compileUnshaded.dependencies.each { dep -> + def depStr = "${dep.group}:${dep.name}:${dep.version}" + logger.info "Excluding ${depStr} from being bundled into the shaded jar." + shadowJar { + dependencies { + exclude(dependency(depStr)) + } + } + } +} + +// Remove the shaded dependencies from the generated pom. +// This hack allows the project to support partially shaded jars, +// where the shadow plugin by default would remove all compile and runtime dependencies. +tasks.withType(Upload) { + def installer = install.repositories.mavenInstaller + def deployer = uploadArchives.repositories.mavenDeployer + + // Handle install and deploy in the same way. + [installer, deployer]*.pom*.whenConfigured { pom -> + def filter = shadowJar.getDependencyFilter() + def configs = shadowJar.getConfigurations() + + def shadowDependencies = configs.collectMany { + // Find all dependencies included in the shaded jar. + it.resolvedConfiguration.firstLevelModuleDependencies.findAll { + filter.isIncluded(it) + } + } + + // Remove the shaded dependencies from the pom. + shadowDependencies.each { shaded -> + def depStr = "${shaded.getModuleGroup()}:${shaded.getModuleName()}:${shaded.getModuleVersion()}" + logger.info "Excluding ${depStr} from the generated pom." + pom.dependencies.removeAll { dep -> + dep.groupId == shaded.getModuleGroup() && + dep.artifactId == shaded.getModuleName() && + dep.version == shaded.getModuleVersion() + } + } + + // Re-sort the generated maven dependencies to make pom comparisons easier. + pom.dependencies = pom.dependencies.sort { dep -> + "$dep.scope:$dep.optional:$dep.groupId:$dep.artifactId" + } + } +} \ No newline at end of file diff --git a/java-scala-spark4/gradle/tests.gradle b/java-scala-spark4/gradle/tests.gradle new file mode 100644 index 0000000000..4157c559ba --- /dev/null +++ b/java-scala-spark4/gradle/tests.gradle @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains common tasks and configuration for unit and integration tests. + +// Support parallel unit test execution. +test { + maxParallelForks = propertyWithDefault("maxParallelForks", "1").toInteger() + if (propertyExists("showTestOutput")) { + // Show stdout and stderr from tests on the console. + testLogging.showStandardStreams = true + } +} + +tasks.withType(Test) { + // Log all test events. + testLogging { + events "passed", "skipped", "failed" + exceptionFormat = "full" + } + + // Break up the stdout and stderr logging by test case. + reports.junitXml.outputPerTestCase = true + + // Fork the jvm for every test class. + // This takes more time and resources but ensures isolation. + forkEvery 1 + + // Enable assertions during tests. + jvmArgs += "-enableassertions" + // Config 1g maximum java heap size for test JVMs. + jvmArgs += "-Xmx1g" + // Hide the Java 9+ warnings about illegal reflective access + if (JavaVersion.current().isJava9Compatible()) { + jvmArgs += "--illegal-access=permit" + def reflectionModules = [ + "java.base/java.io", // java.io reflection is used by Spark + "java.base/java.lang", // java.lang reflection is used by TestUtils + "java.base/java.net", // java.net reflection is used by FakeDNS + "java.base/java.nio", // java.nio reflection is used by Protobuf + "java.base/java.util", // java.util reflection is used by Spark + "java.base/java.util.concurrent", // java.util.concurrent reflection is used by Spark + "java.base/sun.nio.ch", // sun.nio.ch reflection is used by Netty + "java.security.jgss/sun.security.krb5" // sun.security.krb5 reflection is used by Hadoop's KerberosUtil + ] + reflectionModules.forEach { module -> + jvmArgs += "--add-opens=$module=ALL-UNNAMED" + } + } + // Enable paranoid Netty leak detection during tests. + // https://netty.io/wiki/reference-counted-objects.html#leak-detection-levels + if (propertyExists("nettyLeakDetection")) { + jvmArgs += "-Dio.netty.leakDetection.level=paranoid" + } + + // Set a few system properties. + systemProperty "java.awt.headless", true + systemProperty "java.net.preferIPv4Stack", true + systemProperty "java.security.egd", "file:/dev/urandom" // Improve RNG generation speed. + + if (propertyExists("securityDebug")) { + systemProperty "sun.security.krb5.debug", true + systemProperty "sun.security.spnego.debug", true + systemProperty "javax.net.debug", "all" + } + + // Only use the local KuduBinDir if we are not using the kudu-binary jar. + if (!propertyExists("useBinJar")) { + // Set kuduBinDir to the binaries to use with the MiniKuduCluster. + systemProperty "kuduBinDir", propertyWithDefault("kuduBinDir", "$project.rootDir/../build/latest/bin") + } + + // Set testRandomSeed to override the seed for the PRNG exposed in + // RandomUtils.java and used by various tests. This is useful when trying to + // reproduce a test failure for a test that uses the PRNG to generate state. + if (propertyExists("testRandomSeed")) { + systemProperty "testRandomSeed", propertyWithDefault("testRandomSeed", null) + } + + // Force the tests to be rerun if the rerunTests property is set. + if (propertyExists("rerunTests")) { + outputs.upToDateWhen { false } + } +} diff --git a/java-scala-spark4/gradle/wrapper.gradle b/java-scala-spark4/gradle/wrapper.gradle new file mode 100644 index 0000000000..fe268b7233 --- /dev/null +++ b/java-scala-spark4/gradle/wrapper.gradle @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains tasks for the gradle wrapper generation. + +// Ensure the wrapper script is generated based on the version defined in the project +// and not the version installed on the machine running the task. +// Read more about the wrapper here: https://docs.gradle.org/current/userguide/gradle_wrapper.html +wrapper { + gradleVersion = versions.gradle + distributionType = Wrapper.DistributionType.ALL + distributionUrl = "${gradleDistUrlBase}/gradle-${versions.gradle}-all.zip" +} + +// Custom task to inject support for downloading the gradle wrapper jar if it doesn't exist. +// This allows us to avoid checking in the jar to our repository. +// Additionally adds a license header to the wrapper while editing the file contents. +task bootstrapWrapper() { + // In the doLast block so this runs when the task is called and not during project configuration. + doLast { + def wrapperBasePath = "\$APP_HOME/gradle/wrapper" + def wrapperJarPath = wrapperBasePath + "/gradle-wrapper.jar" + + // Add a trailing zero to the version if needed. + def fullVersion = versions.gradle.count(".") == 1 ? "${versions.gradle}.0" : versions.gradle + def wrapperJarUrl = wrapperBaseUrl + "/gradle-wrapper.jar" + + def bootstrapString = """ + # Loop in case we encounter an error. + for attempt in 1 2 3; do + if [ ! -e $wrapperJarPath ]; then + if ! curl -s -S --retry 3 -L -o "$wrapperJarPath" "$wrapperJarUrl"; then + rm -f "$wrapperJarPath" + # Pause for a bit before looping in case the server throttled us. + sleep 5 + continue + fi + fi + done + """.stripIndent() + + def wrapperScript = wrapper.scriptFile + def wrapperLines = wrapperScript.readLines() + wrapperScript.withPrintWriter { out -> + def bootstrapWritten = false + wrapperLines.each { line -> + // Print the wrapper bootstrap before the first usage of the wrapper jar. + if (!bootstrapWritten && line.contains("gradle-wrapper.jar")) { + out.println(bootstrapString) + bootstrapWritten = true + } + out.println(line) + } + } + } +} +wrapper.finalizedBy bootstrapWrapper + +// Remove the generated batch file since we don't test building in the Windows environment. +task removeWindowScript(type: Delete) { + delete "$rootDir/gradlew.bat" +} +wrapper.finalizedBy removeWindowScript diff --git a/java-scala-spark4/gradle/wrapper/gradle-wrapper.properties b/java-scala-spark4/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000000..8cf6eb5ad2 --- /dev/null +++ b/java-scala-spark4/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.3-all.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/java-scala-spark4/gradlew b/java-scala-spark4/gradlew new file mode 100644 index 0000000000..6e24fce06b --- /dev/null +++ b/java-scala-spark4/gradlew @@ -0,0 +1,198 @@ +#!/usr/bin/env sh + +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + + +# Loop in case we encounter an error. +for attempt in 1 2 3; do + if [ ! -e $APP_HOME/gradle/wrapper/gradle-wrapper.jar ]; then + if ! curl -s -S --retry 3 -L -o "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" "https://raw.githubusercontent.com/gradle/gradle/v6.8.3/gradle/wrapper/gradle-wrapper.jar"; then + rm -f "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" + # Pause for a bit before looping in case the server throttled us. + sleep 5 + continue + fi + fi +done + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=`expr $i + 1` + done + case $i in + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=`save "$@"` + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +exec "$JAVACMD" "$@" diff --git a/java-scala-spark4/kudu-backup-common/build.gradle b/java-scala-spark4/kudu-backup-common/build.gradle new file mode 100644 index 0000000000..56d8ad8acd --- /dev/null +++ b/java-scala-spark4/kudu-backup-common/build.gradle @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +apply plugin: "scala" +apply from: "$rootDir/gradle/protobuf.gradle" + +dependencies { + compile project(path: ":kudu-client", configuration: "shadow") + compile libs.protobufJava + compile (libs.protobufJavaUtil) { + // Make sure wrong Guava version is not pulled in. + exclude group: "com.google.guava", module: "guava" + } + compile libs.slf4jApi + + provided libs.hadoopCommon + provided libs.scalaLibrary + + optional libs.yetusAnnotations + + testCompile project(path: ":kudu-test-utils", configuration: "shadow") + testCompile libs.junit + testCompile libs.log4jApi + testCompile libs.log4jCore + testCompile libs.log4jSlf4jImpl + testCompile libs.scalatest +} + +// Add protobuf files to the proto source set. +sourceSets { + main { + proto { + srcDir "src/main/protobuf" + } + } +} + +// kudu-backup-common has no public Javadoc. +javadoc { + enabled = false +} + +// Skip publishing kudu-backup-common artifact because it will always be shaded into +// kudu-backup and kudu-backup-tools. +uploadArchives.enabled = false +install.enabled = false \ No newline at end of file diff --git a/java-scala-spark4/kudu-backup-common/src/main/protobuf/backup.proto b/java-scala-spark4/kudu-backup-common/src/main/protobuf/backup.proto new file mode 100644 index 0000000000..77a13b3e3a --- /dev/null +++ b/java-scala-spark4/kudu-backup-common/src/main/protobuf/backup.proto @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// Protobufs which are used in the Kudu backup process. +// Though these are similar to the kudu common protobufs, +// they are specific to the kudu backup application and +// should be kept seperate. +syntax = "proto3"; +package kudu.backup; + +option java_package = "org.apache.kudu.backup"; + +import "google/protobuf/wrappers.proto"; + +// Maps to the ColumnTypeAttributes class. +// The fields are effectively 1 to 1 mappings of those in ColumnTypeAttributes. +message ColumnTypeAttributesMetadataPB { + int32 precision = 1; + int32 scale = 2; + int32 length = 3; +} + +// Maps to the ColumnSchema class. +// The fields are effectively 1 to 1 mappings of those in ColumnSchema. +message ColumnMetadataPB { + string name = 1; + string type = 2; + ColumnTypeAttributesMetadataPB type_attributes = 3; + bool is_key = 4; + bool is_nullable = 5; + // Uses the StringValue wrapper so we can differentiate between "unset" + // and a default value. + google.protobuf.StringValue default_value = 6; + string encoding = 7; + string compression = 8; + int32 block_size = 9; + string comment = 10; + bool is_auto_incrementing = 11; +} + +// A human readable string representation of a column value for use +// in the RangeBoundsMetadataPB. +message ColumnValueMetadataPB { + string column_name = 1; + // This is a human-readable string value that can + // be parsed back into the appropriate type. + string value = 2; +} + +// These bounds are used in CreateTableOptions to +// generate valid range partition splits. +// They are encoded from a PartialRow and decoded to a PartialRow. +// The fields are repeated because each bound can have multiple column values. +// The number of values equals the number of columns in the range partition key, +// the column must exist in the schema and a column can not be repeated. +message RangeBoundsMetadataPB { + repeated ColumnValueMetadataPB lower_bounds = 1; + repeated ColumnValueMetadataPB upper_bounds = 2; +} + +// Maps to RangeSchema class. +// We add the bounds field so we can generate matching splits +// when restoring the tables. These splits are not currently +// included in the RangeSchema class, but are instead provided in +// the CreateTableOptions. +message RangePartitionMetadataPB { + repeated string column_names = 1; + repeated RangeBoundsMetadataPB bounds = 2; +} + +// Maps to HashBucketSchema class. +// The fields are effectively 1 to 1 mappings of those in HashBucketSchema. +message HashPartitionMetadataPB { + repeated string column_names = 1; + int32 num_buckets = 2; + int32 seed = 3; +} + +// Maps to RangeWithHashSchema class. +// The fields are effectively 1 to 1 mappings of those in RangeWithHashSchema. +message RangeAndHashPartitionMetadataPB { + RangeBoundsMetadataPB bounds = 1; + repeated HashPartitionMetadataPB hash_partitions = 2; +} + +// Maps to PartitionSchema class. +// The fields are effectively 1 to 1 mappings of those in PartitionSchema. +message PartitionSchemaMetadataPB { + repeated HashPartitionMetadataPB hash_partitions = 1; + RangePartitionMetadataPB range_partitions = 2; + repeated RangeAndHashPartitionMetadataPB range_and_hash_partitions = 3; +} + +// Maps to Partition class. +// The fields are effectively 1 to 1 mappings of those in Partition. +message PartitionMetadataPB { + bytes partition_key_start = 1; + bytes partition_key_end = 2; + repeated int32 hash_buckets = 3; +} + +// Represents the metadata of a table backup. This metadata is output +// so we can understand and create a table that matches the backed up +// table on restore. +message TableMetadataPB { + // A version used to handle any future format/layout changes. + int32 version = 1; + // The starting point of a backup. A UNIX timestamp in milliseconds since the epoch. + // If from_ms is 0, this is a full backup. + int64 from_ms = 2; + // The end point of a backup. A UNIX timestamp in milliseconds since the epoch. + int64 to_ms = 3; + // The file format used to store the data. + string data_format = 4; + // The name of the table. + string table_name = 5; + // The internal id of the table. + // This is useful for detecting dropped and added tables. + string table_id = 6; + // The replication factor of this table. + int32 num_replicas = 7; + // The metadata for the table's columns. + repeated ColumnMetadataPB columns = 8; + // A map of column name to internal column id. + // This is validation only and not used when creating the restored table. + // This is useful for detecting dropped and added columns. + map column_ids = 9; + // The metadata for the table's partition schema. + PartitionSchemaMetadataPB partitions = 10; + // A map of tablet ID to the partition start key. + // This is validation only and not used when creating the restored table. + // This is useful for detecting dropped and added partitions. + map tablets = 11; + // The owner of the table. + string table_owner = 12; + // The comment on the table. + string table_comment = 13; +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-backup-common/src/main/scala/org/apache/kudu/backup/BackupGraph.scala b/java-scala-spark4/kudu-backup-common/src/main/scala/org/apache/kudu/backup/BackupGraph.scala new file mode 100644 index 0000000000..344944eefc --- /dev/null +++ b/java-scala-spark4/kudu-backup-common/src/main/scala/org/apache/kudu/backup/BackupGraph.scala @@ -0,0 +1,218 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import org.apache.hadoop.fs.Path +import org.apache.kudu.backup.Backup.TableMetadataPB +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability + +import scala.collection.mutable + +/** + * A directed weighted graph of backups used to pick the optimal series of backups and restores. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +class BackupGraph(val tableId: String) { + // Index of backup.fromMs -> backup for use in chaining backups together. + private val adjacencyList = mutable.Map[Long, mutable.ListBuffer[BackupNode]]() + + // A full backup has FromMs of 0. + private val FullBackupFromMs = 0 + + /** + * Add a backup to the graph. + * @param backup the backup to add. + */ + def addBackup(backup: BackupNode): Unit = { + // Add a weighted edge with the backup. + addEdge(backup) + } + + private def addEdge(backup: BackupNode): Unit = { + val adjacentVertices = + adjacencyList.getOrElse(backup.metadata.getFromMs, mutable.ListBuffer[BackupNode]()) + adjacentVertices += backup + adjacencyList.put(backup.metadata.getFromMs, adjacentVertices) + } + + /** + * @return true if the graph has a full backup. + */ + def hasFullBackup: Boolean = fullBackups.nonEmpty + + /** + * @return all the full backups in the graph. + */ + def fullBackups: Seq[BackupNode] = { + adjacencyList.getOrElse(FullBackupFromMs, Seq()).toSeq + } + + /** + * @return all the backups in the graph. + */ + def allBackups: Seq[BackupNode] = { + adjacencyList.values.flatten.toSeq + } + + /** + * @return the most recent full backup. + * @throws IllegalStateException if no full backup exists. + */ + def mostRecentFull: BackupNode = { + if (!hasFullBackup) throw new IllegalStateException("No full backup exists") + fullBackups.maxBy(_.metadata.getToMs) + } + + /** + * @return all backup paths in the graph. + */ + def backupPaths: Seq[BackupPath] = { + allPaths(FullBackupFromMs, List()) + .map(BackupPath) + .filterNot(_.backups.isEmpty) // Remove empty paths + } + + private def allPaths(fromMs: Long, path: List[BackupNode]): List[List[BackupNode]] = { + if (!adjacencyList.contains(fromMs)) { + List(path) + } else { + adjacencyList(fromMs).flatMap { node => + allPaths(node.metadata.getToMs, path ++ List(node)) + }.toList + } + } + + /** + * Returns the backup that should be used as the base for the next backup. + * + * The logic for picking this backup is as follows: + * + * 1. Pick the paths with the most recent full backup. + * 2. If there are multiple paths, pick the path with the most recent partial backup. + * 3. If there are multiple paths, pick the path with the lowest weight. + * + * This allows concurrent full backups to be taken (or generated by compaction) + * while also taking incremental backups. + * + * While a full backup is running incremental backups will continue to build + * off the chain from the previous full. When the new full completes, the + * next incremental backup will use that full its "current" chain. + * + * @throws IllegalStateException if no full backup exists. + */ + def backupBase: BackupNode = { + // 1. Pick the paths with the most recent full backup. + val recentFulls = backupPaths.filter(_.fullBackup == mostRecentFull) + + // 2. If there are multiple paths, pick the path with the most recent partial backup. + val maxToMs = recentFulls.maxBy(_.toMs).toMs + val recentPaths = recentFulls.filter(_.toMs == maxToMs) + + // 3. If there are multiple paths, pick the path with the lowest weight. + recentPaths.minBy(_.weight).lastBackup + } + + /** + * Returns a sequence of backups that should be used to restore. + * + * The logic for picking this path is as follows: + * + * 1. Pick the path with the most recent backup. + * 2. If there are multiple paths, pick the path with the lowest weight. + * + * This ensures we always restore the most current state of the data while + * also picking the most efficient path (likely a result of compaction). + * + * @throws IllegalStateException if no full backup exists. + */ + def restorePath: BackupPath = { + if (backupPaths.isEmpty) { + throw new RuntimeException(s"No valid backups found for table ID: $tableId") + } + + // 1. Pick the path with the most recent backup. + val maxToMs = backupPaths.maxBy(_.toMs).toMs + val recentPaths = backupPaths.filter(_.toMs == maxToMs) + + // 2. If there are multiple paths, pick the path with the lowest weight. + recentPaths.minBy(_.weight) + } + + /** + * Returns a new BackupGraph that represents the graph including only nodes with a ToMS equal + * to or less than the specified time. + * @param timeMs the time to filter by. + * @return + */ + def filterByTime(timeMs: Long): BackupGraph = { + val result = new BackupGraph(tableId) + val distinctBackups = adjacencyList.values.flatten.toSet + distinctBackups.filter(_.metadata.getToMs <= timeMs).foreach(result.addBackup) + result + } +} + +/** + * Node class to represent nodes in the backup graph. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +case class BackupNode(path: Path, metadata: TableMetadataPB) { + + /** + * @return The weight/cost of this Node. + */ + def weight: Int = { + // Full backups have a weight of 0 and partial backups have a weight of 1. + if (metadata.getFromMs == 0) 0 else 1 + } +} + +/** + * A backup path is a full backup with a series of incremental backups. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +case class BackupPath(backups: Seq[BackupNode]) { + + def fullBackup: BackupNode = backups.head + + def lastBackup: BackupNode = backups.last + + /** + * @return the tableName for the entire path. + */ + def tableName: String = backups.last.metadata.getTableName + + /** + * @return the toMs for the entire path. + */ + def toMs: Long = backups.last.metadata.getToMs + + /** + * @return the weight/cost of the path. + */ + def weight: Int = backups.map(_.weight).sum + + /** + * @return A string useful for debugging the path. + */ + def pathString: String = backups.map(_.metadata.getFromMs).mkString(" -> ") +} diff --git a/java-scala-spark4/kudu-backup-common/src/main/scala/org/apache/kudu/backup/BackupIO.scala b/java-scala-spark4/kudu-backup-common/src/main/scala/org/apache/kudu/backup/BackupIO.scala new file mode 100644 index 0000000000..a079afde1f --- /dev/null +++ b/java-scala-spark4/kudu-backup-common/src/main/scala/org/apache/kudu/backup/BackupIO.scala @@ -0,0 +1,259 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import java.io.InputStreamReader +import java.net.URLEncoder +import java.nio.charset.StandardCharsets + +import com.google.common.io.CharStreams +import com.google.protobuf.util.JsonFormat +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.LocatedFileStatus +import org.apache.hadoop.fs.Path +import org.apache.kudu.backup.Backup.TableMetadataPB +import org.apache.kudu.backup.BackupIO._ +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +import scala.collection.mutable + +/** + * A class to encapsulate and centralize the logic for data layout and IO + * of metadata and data of the backup and restore jobs. + * + * The default backup directory structure is: + * //-// + * .kudu-metadata.json + * part-*.parquet + * + * - rootPath: can be used to distinguish separate backup groups, jobs, or concerns. + * - tableId: the unique internal ID of the table being backed up. + * - tableName: the name of the table being backed up. + * - backup-id: A way to uniquely identify/group the data for a single backup run. + * - Currently the `toMs` time for the job. + * - .kudu-metadata.json: Contains all of the metadata to support recreating the table, + * linking backups by time, and handling data format changes. + * - Written last so that failed backups will not have a metadata file and will not be + * considered at restore time or backup linking time. + * - part-*.parquet: The data files containing the tables data. + * - Incremental backups contain an additional “RowAction” byte column at the end. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +class BackupIO(val conf: Configuration, rootPathStr: String) { + val log: Logger = LoggerFactory.getLogger(getClass) + + val rootPath: Path = new Path(rootPathStr) + val fs: FileSystem = rootPath.getFileSystem(conf) + + /** + * Return the path to the table directory. + */ + def tablePath(tableId: String, tableName: String): Path = { + val encodedTableName = URLEncoder.encode(tableName, "UTF-8") + val dirName = s"$tableId-$encodedTableName" + new Path(rootPath, dirName) + } + + /** + * Return the backup path for a table and time. + */ + def backupPath(tableId: String, tableName: String, timestampMs: Long): Path = { + new Path(tablePath(tableId, tableName), timestampMs.toString) + } + + /** + * Return the path to the metadata file within a backup path. + */ + def backupMetadataPath(backupPath: Path): Path = { + new Path(backupPath, MetadataFileName) + } + + /** + * Serializes the table metadata to Json and writes it to the metadata path. + */ + def writeTableMetadata(tableMetadata: TableMetadataPB, metadataPath: Path): Unit = { + log.info(s"Writing metadata to $metadataPath") + val out = fs.create(metadataPath, /* overwrite= */ false) + val json = JsonFormat.printer().print(tableMetadata) + out.write(json.getBytes(StandardCharsets.UTF_8)) + out.flush() + out.close() + } + + /** + * Deletes the backup. + */ + def deleteBackup(metadata: TableMetadataPB): Unit = { + fs.delete(backupPath(metadata.getTableId, metadata.getTableName, metadata.getToMs), true) + } + + /** + * Reads all of the backup graphs. + */ + def readAllBackupGraphs(): Seq[BackupGraph] = { + buildBackupGraphs(listAllTableDirs(), System.currentTimeMillis()) + } + + /** + * Reads all of the backup graphs for a given list of table names and a time filter. + */ + def readBackupGraphsByTableName( + tableNames: Seq[String], + timeMs: Long = System.currentTimeMillis()): Seq[BackupGraph] = { + // We also need to include the metadata from old table names. + // To handle this we list all directories, get the IDs for the tableNames, + // and then filter the directories by those IDs. + val allDirs = listAllTableDirs() + val encodedNames = tableNames.map(URLEncoder.encode(_, "UTF-8")).toSet + val tableIds = + allDirs.flatMap { dir => + val dirName = dir.getName + val tableName = tableNameFromDirName(dirName) + if (encodedNames.contains(tableName)) { + Some(tableIdFromDirName(dirName)) + } else { + None + } + }.toSet + val dirs = allDirs.filter(dir => tableIds.contains(tableIdFromDirName(dir.getName))) + buildBackupGraphs(dirs, timeMs) + } + + /** + * Reads all of the backup graphs for a given list of table IDs and a time filter. + */ + def readBackupGraphsByTableId( + tableIds: Seq[String], + timeMs: Long = System.currentTimeMillis()): Seq[BackupGraph] = { + val dirs = listTableIdDirs(tableIds) + buildBackupGraphs(dirs, timeMs) + } + + /** + * Builds all of the backup graphs for a given list of directories by reading all of the + * metadata files and inserting them into a backup graph for each table id. + * See [[BackupGraph]] for more details. + */ + private def buildBackupGraphs(dirs: Seq[Path], timeMs: Long): Seq[BackupGraph] = { + // Read all the metadata and filter by timesMs. + val metadata = dirs.flatMap(readTableBackups).filter(_._2.getToMs <= timeMs) + // Group the metadata by the table ID and create a BackupGraph for each table ID. + metadata + .groupBy(_._2.getTableId) + .map { + case (tableId, pm) => + val graph = new BackupGraph(tableId) + pm.foreach { + case (path, metadata) => + graph.addBackup(BackupNode(path, metadata)) + } + graph + } + .toList + } + + /** + * Return all of the table directories. + */ + private def listAllTableDirs(): Seq[Path] = { + listMatching(_ => true) + } + + /** + * Return the table directories for a given list of table IDs. + */ + private def listTableIdDirs(tableIds: Seq[String]): Seq[Path] = { + val idSet = tableIds.toSet + listMatching { file => + val name = file.getPath.getName + file.isDirectory && idSet.contains(tableIdFromDirName(name)) + } + } + + private def tableIdFromDirName(dirName: String): String = { + // Split to the left of "-" and keep the first half to get the table ID. + dirName.splitAt(dirName.indexOf("-"))._1 + } + + private def tableNameFromDirName(dirName: String): String = { + // Split to the right of "-" and keep the second half to get the table name. + dirName.splitAt(dirName.indexOf("-") + 1)._2 + } + + /** + * List all the files in the root directory and return the files that match + * according to the passed function. + */ + private def listMatching(fn: LocatedFileStatus => Boolean): Seq[Path] = { + val results = new mutable.ListBuffer[Path]() + if (fs.exists(rootPath)) { + val iter = fs.listLocatedStatus(rootPath) + while (iter.hasNext) { + val file = iter.next() + if (fn(file)) { + results += file.getPath + } + } + } + results.toSeq + } + + /** + * Reads and returns all of the metadata for a given table directory. + */ + private def readTableBackups(tableDir: Path): Seq[(Path, TableMetadataPB)] = { + val results = new mutable.ListBuffer[(Path, TableMetadataPB)]() + val files = fs.listStatus(tableDir) + files.foreach { file => + if (file.isDirectory) { + val metadataPath = new Path(file.getPath, MetadataFileName) + if (fs.exists(metadataPath)) { + val metadata = readTableMetadata(metadataPath) + results += ((file.getPath, metadata)) + } + } + } + log.info(s"Found ${results.size} paths in ${tableDir.toString}") + results.toList + } + + /** + * Reads and deserializes the metadata file at the given path. + */ + def readTableMetadata(metadataPath: Path): TableMetadataPB = { + val in = new InputStreamReader(fs.open(metadataPath), StandardCharsets.UTF_8) + try { + val json = CharStreams.toString(in) + val builder = TableMetadataPB.newBuilder() + JsonFormat.parser().merge(json, builder) + builder.build() + } finally { + in.close() + } + } +} + +object BackupIO { + // The name of the metadata file within a backup directory. + val MetadataFileName = ".kudu-metadata.json" +} diff --git a/java-scala-spark4/kudu-backup-common/src/main/scala/org/apache/kudu/backup/TableMetadata.scala b/java-scala-spark4/kudu-backup-common/src/main/scala/org/apache/kudu/backup/TableMetadata.scala new file mode 100644 index 0000000000..74f944e8cd --- /dev/null +++ b/java-scala-spark4/kudu-backup-common/src/main/scala/org/apache/kudu/backup/TableMetadata.scala @@ -0,0 +1,450 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import java.math.BigDecimal +import java.sql.Date +import java.util + +import com.google.protobuf.ByteString +import com.google.protobuf.StringValue +import org.apache.commons.net.util.Base64 +import org.apache.kudu.backup.Backup._ +import org.apache.kudu.ColumnSchema.ColumnSchemaBuilder +import org.apache.kudu.ColumnSchema.CompressionAlgorithm +import org.apache.kudu.ColumnSchema.Encoding +import org.apache.kudu.ColumnTypeAttributes.ColumnTypeAttributesBuilder +import org.apache.kudu.client.CreateTableOptions +import org.apache.kudu.client.KuduTable +import org.apache.kudu.client.PartialRow +import org.apache.kudu.client.PartitionSchema +import org.apache.kudu.ColumnSchema +import org.apache.kudu.Schema +import org.apache.kudu.Type +import org.apache.kudu.client.KuduPartitioner.KuduPartitionerBuilder +import org.apache.kudu.client.PartitionSchema.HashBucketSchema +import org.apache.kudu.client.PartitionSchema.RangeSchema +import org.apache.kudu.client.PartitionSchema.RangeWithHashSchema +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability + +import scala.collection.JavaConverters._ + +@InterfaceAudience.Private +@InterfaceStability.Unstable +object TableMetadata { + + val MetadataFileName = ".kudu-metadata.json" + val MetadataVersion = 1 + + def getTableMetadata( + table: KuduTable, + fromMs: Long, + toMs: Long, + format: String): TableMetadataPB = { + val columnIds = new util.HashMap[String, Integer]() + val columns = table.getSchema.getColumns.asScala.map { col => + columnIds.put(col.getName, table.getSchema.getColumnId(col.getName)) + val builder = ColumnMetadataPB + .newBuilder() + .setName(col.getName) + .setType(col.getType.name()) + .setIsKey(col.isKey) + .setIsNullable(col.isNullable) + .setEncoding(col.getEncoding.toString) + .setCompression(col.getCompressionAlgorithm.toString) + .setBlockSize(col.getDesiredBlockSize) + .setComment(col.getComment) + .setIsAutoIncrementing(col.isAutoIncrementing) + if (col.getTypeAttributes != null) { + builder.setTypeAttributes(getTypeAttributesMetadata(col)) + } + if (col.getDefaultValue != null) { + builder.setDefaultValue(StringValue.of(valueToString(col.getDefaultValue, col.getType))) + } + builder.build() + } + + val partitioner = new KuduPartitionerBuilder(table).build() + val tablets = partitioner.getTabletMap.asScala.map { + case (id, partition) => + val metadata = PartitionMetadataPB + .newBuilder() + .setPartitionKeyStart(ByteString.copyFrom(partition.getPartitionKeyStart)) + .setPartitionKeyEnd(ByteString.copyFrom(partition.getPartitionKeyStart)) + .addAllHashBuckets(partition.getHashBuckets) + .build() + (id, metadata) + } + + val builder = TableMetadataPB + .newBuilder() + .setVersion(MetadataVersion) + .setFromMs(fromMs) + .setToMs(toMs) + .setDataFormat(format) + .setTableName(table.getName) + .setTableId(table.getTableId) + .addAllColumns(columns.asJava) + .putAllColumnIds(columnIds) + .setNumReplicas(table.getNumReplicas) + .setPartitions(getPartitionSchemaMetadata(table)) + .putAllTablets(tablets.asJava) + .setTableOwner(table.getOwner) + .setTableComment(table.getComment) + builder.build() + } + + private def getTypeAttributesMetadata(col: ColumnSchema): ColumnTypeAttributesMetadataPB = { + val attributes = col.getTypeAttributes + ColumnTypeAttributesMetadataPB + .newBuilder() + .setPrecision(attributes.getPrecision) + .setScale(attributes.getScale) + .setLength(attributes.getLength) + .build() + } + + private def getPartitionSchemaMetadata(table: KuduTable): PartitionSchemaMetadataPB = { + val hashPartitions = getHashPartitionsMetadata(table) + val rangePartitions = getRangePartitionMetadata(table) + val rangeAndHashPartitions = getRangeAndHashPartitionsMetadata(table) + PartitionSchemaMetadataPB + .newBuilder() + .addAllHashPartitions(hashPartitions.asJava) + .setRangePartitions(rangePartitions) + .addAllRangeAndHashPartitions(rangeAndHashPartitions.asJava) + .build() + } + + private def getRangeAndHashPartitionsMetadata( + table: KuduTable): Seq[RangeAndHashPartitionMetadataPB] = { + val tableSchema = table.getSchema + val partitionSchema = table.getPartitionSchema + val rangeColumnNames = partitionSchema.getRangeSchema.getColumnIds.asScala.map { id => + getColumnById(tableSchema, id).getName + } + partitionSchema.getRangesWithHashSchemas.asScala.map { rhs => + val hashSchemas = rhs.hashSchemas.asScala.map { hs => + val hashColumnNames = hs.getColumnIds.asScala.map { id => + getColumnById(tableSchema, id).getName + } + HashPartitionMetadataPB + .newBuilder() + .addAllColumnNames(hashColumnNames.asJava) + .setNumBuckets(hs.getNumBuckets) + .setSeed(hs.getSeed) + .build() + } + val upperValues = getBoundValues(rhs.upperBound, rangeColumnNames.toSeq, tableSchema) + val lowerValues = getBoundValues(rhs.lowerBound, rangeColumnNames.toSeq, tableSchema) + val bounds = RangeBoundsMetadataPB + .newBuilder() + .addAllUpperBounds(upperValues.asJava) + .addAllLowerBounds(lowerValues.asJava) + .build() + RangeAndHashPartitionMetadataPB + .newBuilder() + .setBounds(bounds) + .addAllHashPartitions(hashSchemas.asJava) + .build() + }.toSeq + } + + private def getHashPartitionsMetadata(table: KuduTable): Seq[HashPartitionMetadataPB] = { + val tableSchema = table.getSchema + val partitionSchema = table.getPartitionSchema + partitionSchema.getHashBucketSchemas.asScala.map { hs => + val columnNames = hs.getColumnIds.asScala.map { id => + getColumnById(tableSchema, id).getName + } + HashPartitionMetadataPB + .newBuilder() + .addAllColumnNames(columnNames.asJava) + .setNumBuckets(hs.getNumBuckets) + .setSeed(hs.getSeed) + .build() + }.toSeq + } + + private def getRangePartitionMetadata(table: KuduTable): RangePartitionMetadataPB = { + val tableSchema = table.getSchema + val partitionSchema = table.getPartitionSchema + val columnNames = partitionSchema.getRangeSchema.getColumnIds.asScala.map { id => + getColumnById(tableSchema, id).getName + } + + val bounds = table + .getRangePartitionsWithTableHashSchema(table.getAsyncClient.getDefaultOperationTimeoutMs) + .asScala + .map { p => + val lowerValues = + getBoundValues(p.getDecodedRangeKeyStart(table), columnNames.toSeq, tableSchema) + val upperValues = + getBoundValues(p.getDecodedRangeKeyEnd(table), columnNames.toSeq, tableSchema) + RangeBoundsMetadataPB + .newBuilder() + .addAllUpperBounds(upperValues.asJava) + .addAllLowerBounds(lowerValues.asJava) + .build() + } + RangePartitionMetadataPB + .newBuilder() + .addAllColumnNames(columnNames.asJava) + .addAllBounds(bounds.asJava) + .build() + } + + private def getColumnById(schema: Schema, colId: Int): ColumnSchema = { + schema.getColumnByIndex(schema.getColumnIndex(colId)) + } + + private def getBoundValues( + bound: PartialRow, + columnNames: Seq[String], + schema: Schema): Seq[ColumnValueMetadataPB] = { + columnNames.filter(bound.isSet).map { col => + val colType = schema.getColumn(col).getType + val value = getValue(bound, col, colType) + ColumnValueMetadataPB + .newBuilder() + .setColumnName(col) + .setValue(valueToString(value, colType)) + .build() + } + } + + private def getPartialRow(values: Seq[ColumnValueMetadataPB], schema: Schema): PartialRow = { + val row = schema.newPartialRow() + values.foreach { v => + val colType = schema.getColumn(v.getColumnName).getType + addValue(valueFromString(v.getValue, colType), row, v.getColumnName, colType) + } + row + } + + def getKuduSchema(metadata: TableMetadataPB): Schema = { + var IsAutoIncrementingPresent = false + metadata.getColumnsList.asScala.foreach { col => + if (col.getIsAutoIncrementing) { + IsAutoIncrementingPresent = true + } + } + val columns = new util.ArrayList[ColumnSchema]() + val colIds = new util.ArrayList[Integer]() + val toId = metadata.getColumnIdsMap.asScala + metadata.getColumnsList.asScala.foreach { col => + if (!col.getIsAutoIncrementing) { + val colType = Type.getTypeForName(col.getType) + val builder = new ColumnSchemaBuilder(col.getName, colType) + .nullable(col.getIsNullable) + .encoding(Encoding.valueOf(col.getEncoding)) + .compressionAlgorithm(CompressionAlgorithm.valueOf(col.getCompression)) + .desiredBlockSize(col.getBlockSize) + .comment(col.getComment) + if (IsAutoIncrementingPresent) { + builder.nonUniqueKey(col.getIsKey) + } else { + builder.key(col.getIsKey) + } + + if (col.hasDefaultValue) { + val value = valueFromString(col.getDefaultValue.getValue, colType) + builder.defaultValue(value) + } + + if (col.hasTypeAttributes) { + val attributes = col.getTypeAttributes + builder.typeAttributes( + new ColumnTypeAttributesBuilder() + .precision(attributes.getPrecision) + .scale(attributes.getScale) + .length(attributes.getLength) + .build() + ) + } + colIds.add(toId(col.getName)) + columns.add(builder.build()) + } + } + new Schema(columns, colIds) + } + + private def getValue(row: PartialRow, columnName: String, colType: Type): Any = { + colType match { + case Type.BOOL => row.getBoolean(columnName) + case Type.INT8 => row.getByte(columnName) + case Type.INT16 => row.getShort(columnName) + case Type.INT32 => row.getInt(columnName) + case Type.INT64 | Type.UNIXTIME_MICROS => row.getLong(columnName) + case Type.FLOAT => row.getFloat(columnName) + case Type.DOUBLE => row.getDouble(columnName) + case Type.VARCHAR => row.getVarchar(columnName) + case Type.STRING => row.getString(columnName) + case Type.BINARY => row.getBinary(columnName) + case Type.DECIMAL => row.getDecimal(columnName) + case Type.DATE => row.getDate(columnName) + case _ => + throw new IllegalArgumentException(s"Unsupported column type: $colType") + } + } + + private def addValue(value: Any, row: PartialRow, columnName: String, colType: Type): Any = { + colType match { + case Type.BOOL => row.addBoolean(columnName, value.asInstanceOf[Boolean]) + case Type.INT8 => row.addByte(columnName, value.asInstanceOf[Byte]) + case Type.INT16 => row.addShort(columnName, value.asInstanceOf[Short]) + case Type.INT32 => row.addInt(columnName, value.asInstanceOf[Int]) + case Type.INT64 | Type.UNIXTIME_MICROS => + row.addLong(columnName, value.asInstanceOf[Long]) + case Type.FLOAT => row.addFloat(columnName, value.asInstanceOf[Float]) + case Type.DOUBLE => row.addDouble(columnName, value.asInstanceOf[Double]) + case Type.VARCHAR => row.addVarchar(columnName, value.asInstanceOf[String]) + case Type.STRING => row.addString(columnName, value.asInstanceOf[String]) + case Type.BINARY => + row.addBinary(columnName, value.asInstanceOf[Array[Byte]]) + case Type.DECIMAL => + row.addDecimal(columnName, value.asInstanceOf[BigDecimal]) + case Type.DATE => row.addDate(columnName, value.asInstanceOf[Date]) + case _ => + throw new IllegalArgumentException(s"Unsupported column type: $colType") + } + } + + /** + * Returns the string value of serialized value according to the type of column. + */ + private def valueToString(value: Any, colType: Type): String = { + colType match { + case Type.BOOL => + String.valueOf(value.asInstanceOf[Boolean]) + case Type.INT8 => + String.valueOf(value.asInstanceOf[Byte]) + case Type.INT16 => + String.valueOf(value.asInstanceOf[Short]) + case Type.INT32 => + String.valueOf(value.asInstanceOf[Int]) + case Type.INT64 | Type.UNIXTIME_MICROS => + String.valueOf(value.asInstanceOf[Long]) + case Type.FLOAT => + String.valueOf(value.asInstanceOf[Float]) + case Type.DOUBLE => + String.valueOf(value.asInstanceOf[Double]) + case Type.VARCHAR => + value.asInstanceOf[String] + case Type.STRING => + value.asInstanceOf[String] + case Type.BINARY => + Base64.encodeBase64String(value.asInstanceOf[Array[Byte]]) + case Type.DECIMAL => + value + .asInstanceOf[BigDecimal] + .toString // TODO: Explicitly control print format + case Type.DATE => + value.asInstanceOf[Date].toString() + case _ => + throw new IllegalArgumentException(s"Unsupported column type: $colType") + } + } + + private def valueFromString(value: String, colType: Type): Any = { + colType match { + case Type.BOOL => value.toBoolean + case Type.INT8 => value.toByte + case Type.INT16 => value.toShort + case Type.INT32 => value.toInt + case Type.INT64 | Type.UNIXTIME_MICROS => value.toLong + case Type.FLOAT => value.toFloat + case Type.DOUBLE => value.toDouble + case Type.VARCHAR => value + case Type.STRING => value + case Type.BINARY => Base64.decodeBase64(value) + case Type.DECIMAL => new BigDecimal(value) // TODO: Explicitly pass scale + case Type.DATE => Date.valueOf(value) + case _ => + throw new IllegalArgumentException(s"Unsupported column type: $colType") + } + } + + def getCreateTableOptionsWithoutRangePartitions( + metadata: TableMetadataPB, + restoreOwner: Boolean): CreateTableOptions = { + val options = new CreateTableOptions() + if (restoreOwner) { + options.setOwner(metadata.getTableOwner) + } + options.setComment(metadata.getTableComment) + options.setNumReplicas(metadata.getNumReplicas) + metadata.getPartitions.getHashPartitionsList.asScala.foreach { hp => + options + .addHashPartitions(hp.getColumnNamesList, hp.getNumBuckets, hp.getSeed) + } + val rangePartitionColumns = + metadata.getPartitions.getRangePartitions.getColumnNamesList + options.setRangePartitionColumns(rangePartitionColumns) + options + } + + def getRangeBoundPartialRows(metadata: TableMetadataPB): Seq[(PartialRow, PartialRow)] = { + val schema = getKuduSchema(metadata) + metadata.getPartitions.getRangePartitions.getBoundsList.asScala.map { b => + val lower = getPartialRow(b.getLowerBoundsList.asScala.toSeq, schema) + val upper = getPartialRow(b.getUpperBoundsList.asScala.toSeq, schema) + (lower, upper) + }.toSeq + } + + def getRangeBoundsPartialRowsWithHashSchemas( + metadata: TableMetadataPB): Seq[RangeWithHashSchema] = { + val schema = getKuduSchema(metadata) + metadata.getPartitions.getRangeAndHashPartitionsList.asScala.map { rhp => + val hashSchemas = rhp.getHashPartitionsList.asScala.map { hp => + val colIds = hp.getColumnNamesList.asScala.map { name => + new Integer(schema.getColumnIndex(schema.getColumnId(name))) + } + new HashBucketSchema(colIds.asJava, hp.getNumBuckets, hp.getSeed) + } + val lower = getPartialRow(rhp.getBounds.getLowerBoundsList.asScala.toSeq, schema) + val upper = getPartialRow(rhp.getBounds.getUpperBoundsList.asScala.toSeq, schema) + new RangeWithHashSchema(lower, upper, hashSchemas.asJava) + }.toSeq + } + + def getPartitionSchema(metadata: TableMetadataPB): PartitionSchema = { + val colNameToId = metadata.getColumnIdsMap.asScala + val schema = getKuduSchema(metadata) + val rangeIds = + metadata.getPartitions.getRangePartitions.getColumnNamesList.asScala.map(colNameToId) + val rangeSchema = new RangeSchema(rangeIds.asJava) + val hashSchemas = metadata.getPartitions.getHashPartitionsList.asScala.map { hp => + val colIds = hp.getColumnNamesList.asScala.map(colNameToId) + new HashBucketSchema(colIds.asJava, hp.getNumBuckets, hp.getSeed) + } + val rangesWithHashSchemas = + metadata.getPartitions.getRangeAndHashPartitionsList.asScala.map { rhp => + val rangeHashSchemas = rhp.getHashPartitionsList.asScala.map { hp => + val colIds = hp.getColumnNamesList.asScala.map(colNameToId) + new HashBucketSchema(colIds.asJava, hp.getNumBuckets, hp.getSeed) + } + val lower = getPartialRow(rhp.getBounds.getLowerBoundsList.asScala.toSeq, schema) + val upper = getPartialRow(rhp.getBounds.getUpperBoundsList.asScala.toSeq, schema) + new RangeWithHashSchema(lower, upper, rangeHashSchemas.asJava) + } + new PartitionSchema(rangeSchema, hashSchemas.asJava, rangesWithHashSchemas.asJava, schema) + } +} diff --git a/java-scala-spark4/kudu-backup-common/src/test/resources/log4j2.properties b/java-scala-spark4/kudu-backup-common/src/test/resources/log4j2.properties new file mode 100644 index 0000000000..e85deec999 --- /dev/null +++ b/java-scala-spark4/kudu-backup-common/src/test/resources/log4j2.properties @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +status = error +name = PropertiesConfig +appenders = console + +appender.console.type = Console +appender.console.name = STDOUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n + +rootLogger.level = info +rootLogger.appenderRefs = stdout +rootLogger.appenderRef.stdout.ref = STDOUT + +logger.kudu.name = org.apache.kudu +logger.kudu.level = debug \ No newline at end of file diff --git a/java-scala-spark4/kudu-backup-common/src/test/scala/org/apache/kudu/backup/TestBackupGraph.scala b/java-scala-spark4/kudu-backup-common/src/test/scala/org/apache/kudu/backup/TestBackupGraph.scala new file mode 100644 index 0000000000..ff4bd9321e --- /dev/null +++ b/java-scala-spark4/kudu-backup-common/src/test/scala/org/apache/kudu/backup/TestBackupGraph.scala @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import com.google.common.collect.ImmutableList +import org.apache.kudu.client.CreateTableOptions +import org.apache.kudu.client.KuduTable +import org.apache.kudu.test.ClientTestUtil.getBasicSchema +import org.apache.kudu.test.KuduTestHarness +import org.junit.Assert._ +import org.junit.Before +import org.junit.Rule +import org.junit.Test +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +import scala.annotation.meta.getter + +class TestBackupGraph { + val log: Logger = LoggerFactory.getLogger(getClass) + + var tableName: String = "TestBackupGraph" + var table: KuduTable = _ + + @(Rule @getter) + val harness = new KuduTestHarness + + @Before + def setUp(): Unit = { + // Create the test table. + val builder = new CreateTableOptions().setNumReplicas(3) + builder.setRangePartitionColumns(ImmutableList.of("key")) + table = harness.getClient.createTable(tableName, getBasicSchema, builder) + } + + @Test + def testSimpleBackupGraph() { + val graph = new BackupGraph(table.getTableId) + val full = createBackupVertex(table, 0, 1) + graph.addBackup(full) + + // Validate a graph with only a single full backup. + assertEquals(1, graph.fullBackups.size) + assertEquals(1, graph.backupPaths.size) + val fullPath = graph.backupPaths.head + assertEquals("0", fullPath.pathString) + + // Validate a graph with a couple incremental backups. + val inc1 = createBackupVertex(table, 1, 2) + graph.addBackup(inc1) + val inc2 = createBackupVertex(table, 2, 3) + graph.addBackup(inc2) + assertEquals(1, graph.fullBackups.size) + assertEquals(1, graph.backupPaths.size) + + val incPath = graph.backupPaths.head + assertEquals("0 -> 1 -> 2", incPath.pathString) + } + + @Test + def testForkingBackupGraph() { + val graph = new BackupGraph(table.getTableId) + val full = createBackupVertex(table, 0, 1) + graph.addBackup(full) + // Duplicate fromMs of 1 creates a fork in the graph. + val inc1 = createBackupVertex(table, 1, 2) + graph.addBackup(inc1) + val inc2 = createBackupVertex(table, 1, 4) + graph.addBackup(inc2) + val inc3 = createBackupVertex(table, 2, 3) + graph.addBackup(inc3) + val inc4 = createBackupVertex(table, 4, 5) + graph.addBackup(inc4) + + assertEquals(1, graph.fullBackups.size) + assertEquals(2, graph.backupPaths.size) + + val path1 = graph.backupPaths.head + assertEquals("0 -> 1 -> 2", path1.pathString) + + val path2 = graph.backupPaths.last + assertEquals("0 -> 1 -> 4", path2.pathString) + + // Ensure the most recent incremental is used for a backup base and restore path. + assertEquals(5, graph.backupBase.metadata.getToMs) + assertEquals(5, graph.restorePath.toMs) + } + + @Test + def testMultiFullBackupGraph() { + val graph = new BackupGraph(table.getTableId) + val full1 = createBackupVertex(table, 0, 1) + graph.addBackup(full1) + val inc1 = createBackupVertex(table, 1, 2) + graph.addBackup(inc1) + val inc2 = createBackupVertex(table, 2, 4) + graph.addBackup(inc2) + + // Add a second full backup. + val full2 = createBackupVertex(table, 0, 4) + graph.addBackup(full2) + val inc3 = createBackupVertex(table, 4, 5) + graph.addBackup(inc3) + + assertEquals(2, graph.fullBackups.size) + assertEquals(2, graph.backupPaths.size) + val path1 = graph.backupPaths.head + assertEquals("0 -> 1 -> 2 -> 4", path1.pathString) + + val path2 = graph.backupPaths.last + assertEquals("0 -> 4", path2.pathString) + + // Ensure the most recent incremental is used for a backup base and restore path. + assertEquals(5, graph.backupBase.metadata.getToMs) + assertEquals(5, graph.restorePath.toMs) + } + + @Test + def testFilterByTime() { + val graph = new BackupGraph(table.getName) + val full1 = createBackupVertex(table, 0, 1) + graph.addBackup(full1) + val inc1 = createBackupVertex(table, 1, 2) + graph.addBackup(inc1) + val inc2 = createBackupVertex(table, 2, 4) + graph.addBackup(inc2) + + // Add a second full backup. + val full2 = createBackupVertex(table, 0, 4) + graph.addBackup(full2) + val inc3 = createBackupVertex(table, 4, 5) + graph.addBackup(inc3) + + val newGraph = graph.filterByTime(2) + + assertEquals(1, newGraph.fullBackups.size) + assertEquals(1, newGraph.backupPaths.size) + } + + private def createBackupVertex(table: KuduTable, fromMs: Long, toMs: Long): BackupNode = { + val metadata = TableMetadata.getTableMetadata(table, fromMs, toMs, "parquet") + BackupNode(null, metadata) + } +} diff --git a/java-scala-spark4/kudu-backup-tools/build.gradle b/java-scala-spark4/kudu-backup-tools/build.gradle new file mode 100644 index 0000000000..2f5bd0cb84 --- /dev/null +++ b/java-scala-spark4/kudu-backup-tools/build.gradle @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +apply plugin: "scala" +apply from: "$rootDir/gradle/shadow.gradle" + +dependencies { + compile project(path: ":kudu-backup-common") + compile project(path: ":kudu-client", configuration: "shadow") + compile (libs.scopt) { + // Make sure wrong Scala version is not pulled in. + exclude group: "org.scala-lang", module: "scala-library" + } + compile libs.scalaLibrary + compile libs.slf4jApi + + provided libs.hadoopClient + + optional libs.yetusAnnotations + + testCompile project(path: ":kudu-test-utils", configuration: "shadow") + testCompile libs.hadoopMiniCluster + testCompile libs.junit + testCompile libs.scalatest +} + +// kudu-backup-tools has no public Javadoc. +javadoc { + enabled = false +} + +tasks.withType(com.github.spotbugs.snom.SpotBugsTask) { + // This class causes SpotBugs runtime errors, so we completely ignore it from analysis. + classes = classes.filter { !it.path.contains("BaseTestKuduBackupCleaner") } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-backup-tools/src/main/resources/log4j2.properties b/java-scala-spark4/kudu-backup-tools/src/main/resources/log4j2.properties new file mode 100644 index 0000000000..881d729877 --- /dev/null +++ b/java-scala-spark4/kudu-backup-tools/src/main/resources/log4j2.properties @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +status = error +name = PropertiesConfig +appenders = console + +appender.console.type = Console +appender.console.name = STDERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n + +rootLogger.level = error +rootLogger.appenderRefs = stderr +rootLogger.appenderRef.stderr.ref = STDERR \ No newline at end of file diff --git a/java-scala-spark4/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCLI.scala b/java-scala-spark4/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCLI.scala new file mode 100644 index 0000000000..1df4a577df --- /dev/null +++ b/java-scala-spark4/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCLI.scala @@ -0,0 +1,168 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import java.time.Duration +import java.time.temporal.ChronoUnit +import java.util.Locale + +import org.apache.kudu.backup.BackupCLIOptions.DefaultDryRun +import org.apache.kudu.backup.BackupCLIOptions.DefaultExpirationAge +import org.apache.kudu.backup.BackupCLIOptions.DefaultFormat +import org.apache.kudu.backup.BackupCLIOptions.DefaultListType +import org.apache.kudu.backup.BackupCLIOptions.DefaultVerbose +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability +import scopt.OptionParser + +object Mode extends Enumeration { + val LIST, CLEAN = Value +} + +// The possible backup CLI tool list types. +@InterfaceAudience.Private +@InterfaceStability.Unstable +object ListType extends Enumeration { + val LATEST, RESTORE_SEQUENCE, ALL = Value +} + +// The possible backup CLI print formats. +@InterfaceAudience.Private +@InterfaceStability.Unstable +object Format extends Enumeration { + val PRETTY, TSV, CSV = Value +} + +@InterfaceAudience.Private +@InterfaceStability.Unstable +case class BackupCLIOptions( + rootPath: String, + mode: Mode.Value, + tables: Seq[String] = Seq(), + listType: ListType.Value = DefaultListType, + format: Format.Value = DefaultFormat, + expirationAge: Duration = DefaultExpirationAge, + dryRun: Boolean = DefaultDryRun, + verbose: Boolean = DefaultVerbose) + +object BackupCLIOptions { + + val DefaultDryRun: Boolean = false + val DefaultExpirationAge: Duration = Duration.of(30, ChronoUnit.DAYS) + val DefaultFormat: Format.Value = Format.PRETTY + val DefaultListType: ListType.Value = ListType.LATEST + val DefaultVerbose: Boolean = false + + val ProgramName: String = "kudu-backup-tools" + + val parser: OptionParser[BackupCLIOptions] = + new OptionParser[BackupCLIOptions](ProgramName) { + + opt[String]("rootPath") + .action((v, o) => o.copy(rootPath = v)) + .text("The root path to search for backups. Accepts any Hadoop compatible path.") + .required() + + arg[String]("...") + .unbounded() + .action((v, o) => o.copy(tables = o.tables :+ v)) + .text("A list of tables to be included. Specifying no tables includes all tables.") + .optional() + + help("help").text("Prints this usage text") + + cmd("list") + .action((_, c) => c.copy(mode = Mode.LIST)) + .text("List the backups in the rootPath.") + .children( + opt[String]("type") + .action((v, o) => o.copy(listType = ListType.withName(v.toUpperCase(Locale.ENGLISH)))) + .text("The type of listing to perform. One of 'latest', 'restore_sequence', 'all'. " + + s"Default: ${DefaultListType.toString.toLowerCase(Locale.ENGLISH)}") + .validate(validateEnumeratedOption( + "type", + ListType.values.map(_.toString.toLowerCase(Locale.ENGLISH)))), + opt[String]("format") + .action((v, o) => o.copy(format = Format.withName(v.toUpperCase(Locale.ENGLISH)))) + .text(s"The output format. One of 'pretty', 'tsv', 'csv'. " + + s"Default: ${DefaultFormat.toString.toLowerCase(Locale.ENGLISH)}") + .validate(validateEnumeratedOption( + "format", + Format.values.map(_.toString.toLowerCase(Locale.ENGLISH)))) + .optional() + ) + + cmd("clean") + .action((_, c) => c.copy(mode = Mode.CLEAN)) + .text("Cleans up old backup data in the rootPath.") + .children( + opt[String]("expirationAgeDays") + .action((v, o) => o.copy(expirationAge = Duration.of(v.toLong, ChronoUnit.DAYS))) + .text("The age at which old backups should be expired. " + + "Backups that are part of the current restore path are never expired. " + + s"Default: ${DefaultExpirationAge.toDays} Days") + .optional(), + opt[Boolean]("dryRun") + .action((v, o) => o.copy(dryRun = v)) + .text("Report on what backups will be deleted, but don't delete anything. " + + s"Overrides --verbose. Default: $DefaultDryRun") + .optional(), + opt[Boolean]("verbose") + .action((v, o) => o.copy(verbose = v)) + .text(s"Report on what backups are deleted. Default: $DefaultVerbose") + .optional() + ) + } + + def validateEnumeratedOption( + name: String, + optionStrings: Iterable[String]): String => Either[String, Unit] = + (v: String) => { + if (optionStrings.exists(_.equalsIgnoreCase(v))) { + Right(()) + } else { + Left(s"$name must be one of ${optionStrings.mkString(", ")}: $v") + } + } + + def parse(args: Seq[String]): Option[BackupCLIOptions] = { + parser.parse(args, BackupCLIOptions(null, null)) + } +} + +@InterfaceAudience.Private +@InterfaceStability.Unstable +object KuduBackupCLI { + + // Run the backup CLI tool with the given options. + // Like a command, returns 0 if successful, or a nonzero error code. + def run(options: BackupCLIOptions): Int = { + options.mode match { + case Mode.LIST => KuduBackupLister.run(options) + case Mode.CLEAN => KuduBackupCleaner.run(options) + case _ => throw new IllegalArgumentException("Arguments must come after the command") + } + } + + def main(args: Array[String]): Unit = { + val options = BackupCLIOptions + .parse(args) + .getOrElse(throw new IllegalArgumentException("could not parse the arguments")) + System.exit(run(options)) + } +} diff --git a/java-scala-spark4/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCleaner.scala b/java-scala-spark4/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCleaner.scala new file mode 100644 index 0000000000..1c0f43ac06 --- /dev/null +++ b/java-scala-spark4/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCleaner.scala @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import java.time.Instant + +import com.google.common.base.Preconditions +import org.apache.hadoop.conf.Configuration +import org.apache.kudu.backup.Backup.TableMetadataPB + +object KuduBackupCleaner { + + private def backupToShortString(metadata: TableMetadataPB): String = { + s"name: ${metadata.getTableName}, id: ${metadata.getTableId}, fromMs: ${metadata.getFromMs}, " + + s"toMs: ${metadata.getToMs}" + } + + // Run the cleanup tool with the given options. Like a command, returns 0 if successful, or + // a nonzero error code. + def run(options: BackupCLIOptions): Int = { + Preconditions.checkArgument(options.mode == Mode.CLEAN) + + // Delete the metadata for all backups that satisfy the following three conditions: + // 1. The table name matches the provided names (does not apply if no names were specified). + // 2. The backup is part of a path whose latest backup is older than the expiration age. + // 3. The backup is not on the current restore path. + // TODO(KUDU-2827): Consider dropped tables eligible for deletion once they reach a certain age. + val io: BackupIO = new BackupIO(new Configuration(), options.rootPath) + val backupGraphs = + if (options.tables.isEmpty) + io.readAllBackupGraphs() + else + io.readBackupGraphsByTableName(options.tables) + val now = Instant.now() + + val tableNameSet = options.tables.toSet + backupGraphs.foreach { graph => + val expiredPaths = graph.backupPaths.filter(path => { + val lastBackupInstant = Instant.ofEpochSecond(path.lastBackup.metadata.getToMs / 1000) + now.isAfter(lastBackupInstant.plus(options.expirationAge)) + }) + + // The graph might be for a table that was once named a name in 'options.tables', but we only + // want to clean up tables whose current name is in 'options.tables'. + // TODO: This is temporary. It will change when pattern support is added. + val currentTableName = graph.restorePath.tableName + if (tableNameSet.isEmpty || tableNameSet.contains(currentTableName)) { + // For each expired path, iterate over it from latest backup to earliest backup and delete + // the backup, unless the backup-to-be-deleted is also part of the restore path. Deleting + // from last to first in the path ensures that if the tool crashes partway through then a + // prefix of the backup path is preserved and the tool can delete the rest of the eligible + // backups next time it runs. + val restoreSet = graph.restorePath.backups.toSet + expiredPaths.foreach(path => { + path.backups + .filterNot(restoreSet.contains) + .reverseMap(backup => { + if (options.dryRun) { + println(s"DRY RUN: Delete backup ${backupToShortString(backup.metadata)}") + } else { + if (options.verbose) { + println(s"Delete backup ${backupToShortString(backup.metadata)}") + } + // TODO(wdberkeley): Make this crash-consistent by handling backup directories + // with no metadata. + io.deleteBackup(backup.metadata) + } + }) + }) + } + } + + 0 + } +} diff --git a/java-scala-spark4/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupLister.scala b/java-scala-spark4/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupLister.scala new file mode 100644 index 0000000000..0fe39d567c --- /dev/null +++ b/java-scala-spark4/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupLister.scala @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import java.text.SimpleDateFormat + +import com.google.common.base.Preconditions +import org.apache.hadoop.conf.Configuration +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability + +@InterfaceAudience.Private +@InterfaceStability.Unstable +object KuduBackupLister { + + // The header for all tables printed by the tool. + val HEADER: Seq[String] = + Seq("table name", "table id", "end time", "start timestamp", "end timestamp", "type") + + // Run the backup CLI tool with the given options. Like a command, returns 0 if successful, or + // a nonzero error code. + def run(options: BackupCLIOptions): Int = { + Preconditions.checkArgument(options.mode == Mode.LIST) + + // Sort by table name for a consistent ordering (at least if there's no duplicate names). + val sortedTables = options.tables.sorted + + val io: BackupIO = new BackupIO(new Configuration(), options.rootPath) + val backupGraphs = + if (sortedTables.isEmpty) + // Sort by table name for a consistent ordering. + io.readAllBackupGraphs().sortBy(_.backupBase.metadata.getTableName) + else + io.readBackupGraphsByTableName(sortedTables) + + options.listType match { + case ListType.LATEST => { + val rows = backupGraphs.map(graph => rowForBackupNode(graph.restorePath.lastBackup)) + printTable(options.format, rows) + } + case ListType.RESTORE_SEQUENCE => { + val tablesOfBackups = + backupGraphs.map(_.restorePath.backups.map(node => rowForBackupNode(node))) + tablesOfBackups.foreach(table => printTable(options.format, table)) + } + case ListType.ALL => { + val tablesOfBackups = backupGraphs.map( + _.allBackups.sortBy(node => node.metadata.getToMs).map(node => rowForBackupNode(node))) + tablesOfBackups.foreach(table => printTable(options.format, table)) + } + } + // Because of renames, one table name might map to multiple backup directories, so it's not + // sufficient to check the size of 'options.tables' against the size of 'backupGraphs'. + val foundTables = backupGraphs.map(graph => graph.backupBase.metadata.getTableName).toSet + val notFoundTables = options.tables.filter(table => !foundTables.contains(table)) + if (notFoundTables.nonEmpty) { + Console.err.println(s"No backups were found for ${notFoundTables.size} table(s):") + notFoundTables.foreach(Console.err.println) + return 1 + } + 0 + } + + private def rowForBackupNode(backup: BackupNode): Seq[String] = { + val metadata = backup.metadata + val tableName = metadata.getTableName + val tableId = metadata.getTableId + val fromMs = metadata.getFromMs + val toMs = metadata.getToMs + val toDatetime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ").format(toMs) + val backupType = if (fromMs == 0) "full" else "incremental" + Seq(tableName, tableId, toDatetime, s"$fromMs", s"$toMs", backupType) + } + + private def formatDsv(delimiter: String, table: Seq[Seq[String]]): String = { + table.map(_.mkString(delimiter)).mkString("\n") + } + + private def formatPrettyTable(table: Seq[Seq[String]]): String = { + if (table.isEmpty) { + return "" + } + // The width of a column is the width of largest cell, plus a padding of 2. + val colWidths = table.transpose.map(_.map(_.length).max + 2) + val rows = table.map { row => + (row, colWidths).zipped + .map { + // 1 space on left, then pad to (padding - 1) spaces. + case (cell, width) => s" %-${width - 1}s".format(cell) + } + .mkString("|") + } + val separatorRow = colWidths.map("-" * _).mkString("+") + (rows.head +: separatorRow +: rows.tail).mkString("\n") + } + + private def printTable(format: Format.Value, rows: Seq[Seq[String]]): Unit = { + if (rows.isEmpty) { + return + } + val table = HEADER +: rows + format match { + case Format.PRETTY => { + println(formatPrettyTable(table)) + } + case Format.TSV => { + println(formatDsv("\t", table)) + } + case Format.CSV => { + println(formatDsv(",", table)) + } + } + println() // Spacing after the table. + } +} diff --git a/java-scala-spark4/kudu-backup-tools/src/test/resources/log4j2.properties b/java-scala-spark4/kudu-backup-tools/src/test/resources/log4j2.properties new file mode 100644 index 0000000000..e85deec999 --- /dev/null +++ b/java-scala-spark4/kudu-backup-tools/src/test/resources/log4j2.properties @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +status = error +name = PropertiesConfig +appenders = console + +appender.console.type = Console +appender.console.name = STDOUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n + +rootLogger.level = info +rootLogger.appenderRefs = stdout +rootLogger.appenderRef.stdout.ref = STDOUT + +logger.kudu.name = org.apache.kudu +logger.kudu.level = debug \ No newline at end of file diff --git a/java-scala-spark4/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCleaner.scala b/java-scala-spark4/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCleaner.scala new file mode 100644 index 0000000000..9585147378 --- /dev/null +++ b/java-scala-spark4/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCleaner.scala @@ -0,0 +1,167 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import java.io.File +import java.nio.file.Files +import java.time.temporal.ChronoUnit +import java.time.Duration +import java.time.Instant + +import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileUtil +import org.apache.hadoop.fs.{Path => HPath} +import org.apache.hadoop.hdfs.MiniDFSCluster +import org.junit.After +import org.junit.Assert._ +import org.junit.Before +import org.junit.Test +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +abstract class BaseTestKuduBackupCleaner { + val log: Logger = LoggerFactory.getLogger(getClass) + + var rootPath: String = _ + + // Return the epoch time in milliseconds that is 'secsBefore' seconds before 'current'. + private def epochMillisBefore(current: Instant, secsBefore: Long): Long = { + current.minus(Duration.of(secsBefore, ChronoUnit.SECONDS)).getEpochSecond * 1000 + } + + @Test + def testBackupCleaner(): Unit = { + val io = new BackupIO(new Configuration(), rootPath) + val expirationAge = Duration.of(15, ChronoUnit.SECONDS) + val now = Instant.now + val tableName = "taco" + + val createPath = (ages: Array[Long]) => { + for (i <- ages.indices) { + val fromMs = if (i == 0) 0 else epochMillisBefore(now, ages(i - 1)) + val toMs = epochMillisBefore(now, ages(i)) + TestUtils.createTableMetadata(io, tableName, fromMs, toMs) + } + } + + // Create a graph of backups for a table incrementally. At first, there'll be one backup path, + // path A, which must therefore be the restore path. All of its backups will be older than the + // expiration age. + val pathA: Array[Long] = Array(25, 21, 16) + createPath(pathA) + + // Nothing should be cleaned up because all backups are on the restore path. + val options = createOptions(rootPath, expirationAge, verbose = true) + assertEquals(0, KuduBackupCleaner.run(options)) + + val backupExists = (secsAgo: Long) => { + val backupPath = + new HPath(io.tablePath(s"id_$tableName", tableName), s"${epochMillisBefore(now, secsAgo)}") + val metadataPath = io.backupMetadataPath(backupPath) + println(s"checking existence of $metadataPath") + io.fs.exists(metadataPath) + } + + assertTrue(pathA.forall(backupExists(_))) + + // Now add a new backup path, path B, that ends at a later time and so is the new restore path. + val pathB: Array[Long] = Array(20, 15, 10, 5, 1) + createPath(pathB) + + // Add a backup with a from time of now - 20 and a to time of now - 18. The backup path that + // ends in this backup is expired, but it forks from the restore path. + TestUtils + .createTableMetadata(io, tableName, epochMillisBefore(now, 20), epochMillisBefore(now, 18)) + + // Running the cleaner should delete path A and the forked backup, but first do a dry run and + // make sure nothing gets deleted. + val dryRunOptions = createOptions(rootPath, expirationAge, dryRun = true) + assertEquals(0, KuduBackupCleaner.run(dryRunOptions)) + + assertTrue(pathA.forall(backupExists(_))) + assertTrue(backupExists(18)) + assertTrue(pathB.forall(backupExists(_))) + + // After the cleaner runs, path A and the forked backup should be deleted and path B should remain. + assertEquals(0, KuduBackupCleaner.run(options)) + assertTrue(pathA.forall(!backupExists(_))) + assertTrue(!backupExists(18)) + assertTrue(pathB.forall(backupExists(_))) + + // Finally, add a third path which is not the restore path but which has backups that are old + // enough to get deleted and backups that are too new to be deleted. + val pathC: Array[Long] = Array(19, 14, 9, 4, 2) + createPath(pathC) + + assertEquals(0, KuduBackupCleaner.run(options)) + assertTrue(pathA.forall(!backupExists(_))) + assertTrue(pathB.forall(backupExists(_))) + assertTrue(pathC.forall(backupExists(_))) + } + + def createOptions( + rootPath: String, + expirationAge: Duration, + tables: Seq[String] = Seq(), + dryRun: Boolean = false, + verbose: Boolean = false): BackupCLIOptions = { + new BackupCLIOptions( + rootPath, + Mode.CLEAN, + tables = tables, + expirationAge = expirationAge, + dryRun = dryRun, + verbose = verbose) + } +} + +class LocalTestKuduBackupCleaner extends BaseTestKuduBackupCleaner { + + @Before + def setUp(): Unit = { + rootPath = Files.createTempDirectory("backupcli").toAbsolutePath.toString + } + + @After + def tearDown(): Unit = { + FileUtils.deleteDirectory(new File(rootPath)) + } +} + +class HDFSTestKuduBackupCleaner extends BaseTestKuduBackupCleaner { + var baseDir: File = _ + + @Before + def setUp(): Unit = { + baseDir = Files.createTempDirectory("hdfs-test").toFile.getAbsoluteFile + + // Create an HDFS mini-cluster. + val conf = new Configuration() + conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath) + val hdfsCluster = new MiniDFSCluster.Builder(conf).build() + + // Set the root path to use the HDFS URI. + rootPath = "hdfs://localhost:" + hdfsCluster.getNameNodePort + "/" + } + + @After + def tearDown(): Unit = { + FileUtil.fullyDelete(baseDir) + } +} diff --git a/java-scala-spark4/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupLister.scala b/java-scala-spark4/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupLister.scala new file mode 100644 index 0000000000..4802e66ce9 --- /dev/null +++ b/java-scala-spark4/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupLister.scala @@ -0,0 +1,226 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import java.io.ByteArrayOutputStream +import java.io.File +import java.io.PrintStream +import java.nio.file.Files +import java.text.SimpleDateFormat + +import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileUtil +import org.apache.hadoop.hdfs.MiniDFSCluster +import org.junit.Assert._ +import org.junit.After +import org.junit.Before +import org.junit.Test +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +abstract class BaseTestKuduBackupLister { + val log: Logger = LoggerFactory.getLogger(getClass) + + var rootPath: String = _ + + // Helper to write a standard collection of backup metadata useful for a few tests. + private def createStandardTableMetadata(io: BackupIO): Unit = { + Seq( + // Two fulls and one incremental for 'taco' table. + ("taco", 0, 100), + ("taco", 0, 1000), + ("taco", 100, 2000), + // One full and two incrementals for 'pizza' table. + ("pizza", 0, 200), + ("pizza", 200, 400), + ("pizza", 400, 600) + ).foreach { + case (tableName: String, fromMs: Int, toMs: Int) => + TestUtils.createTableMetadata(io, tableName, fromMs, toMs) + } + } + + // Helper to format the end time column, since its value depends on the timezone of the machine + // where the tool is run. + private def endTime(toMs: Long): String = { + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ").format(toMs) + } + + @Test + def testListAllBackups(): Unit = { + val io = new BackupIO(new Configuration(), rootPath) + createStandardTableMetadata(io) + + val options = createOptions(rootPath, ListType.ALL) + val stdout = new ByteArrayOutputStream + Console.withOut(new PrintStream(stdout, false, "UTF-8")) { + assertEquals(0, KuduBackupCLI.run(options)) + } + + val headerString = KuduBackupLister.HEADER.mkString(",") + val expected = Seq( + headerString, + s"pizza,id_pizza,${endTime(200)},0,200,full", + s"pizza,id_pizza,${endTime(400)},200,400,incremental", + s"pizza,id_pizza,${endTime(600)},400,600,incremental", + "", + headerString, + s"taco,id_taco,${endTime(100)},0,100,full", + s"taco,id_taco,${endTime(1000)},0,1000,full", + s"taco,id_taco,${endTime(2000)},100,2000,incremental" + ).mkString("\n") + assertEquals(expected, stdout.toString("UTF-8").trim) + } + + @Test + def testListLatestBackups(): Unit = { + val io = new BackupIO(new Configuration(), rootPath) + createStandardTableMetadata(io) + + val options = createOptions(rootPath, ListType.LATEST) + val stdout = new ByteArrayOutputStream + Console.withOut(new PrintStream(stdout, false, "UTF-8")) { + assertEquals(0, KuduBackupCLI.run(options)) + } + + val headerString = KuduBackupLister.HEADER.mkString(",") + val expected = Seq( + headerString, + s"pizza,id_pizza,${endTime(600)},400,600,incremental", + s"taco,id_taco,${endTime(2000)},100,2000,incremental" + ).mkString("\n") + assertEquals(expected, stdout.toString("UTF-8").trim) + } + + @Test + def testListRestorePath(): Unit = { + val io = new BackupIO(new Configuration(), rootPath) + createStandardTableMetadata(io) + + val options = createOptions(rootPath, ListType.RESTORE_SEQUENCE) + val stdout = new ByteArrayOutputStream + Console.withOut(new PrintStream(stdout, false, "UTF-8")) { + assertEquals(0, KuduBackupCLI.run(options)) + } + + val headerString = KuduBackupLister.HEADER.mkString(",") + val expected = Seq( + headerString, + s"pizza,id_pizza,${endTime(200)},0,200,full", + s"pizza,id_pizza,${endTime(400)},200,400,incremental", + s"pizza,id_pizza,${endTime(600)},400,600,incremental", + "", + headerString, + s"taco,id_taco,${endTime(100)},0,100,full", + s"taco,id_taco,${endTime(2000)},100,2000,incremental" + ).mkString("\n") + assertEquals(expected, stdout.toString("UTF-8").trim) + } + + @Test + def testTableFilter(): Unit = { + val io = new BackupIO(new Configuration(), rootPath) + createStandardTableMetadata(io) + + val options = createOptions(rootPath, ListType.ALL, Seq("taco")) + val stdout = new ByteArrayOutputStream + Console.withOut(new PrintStream(stdout, false, "UTF-8")) { + assertEquals(0, KuduBackupCLI.run(options)) + } + + val headerString = KuduBackupLister.HEADER.mkString(",") + val expected = Seq( + headerString, + s"taco,id_taco,${endTime(100)},0,100,full", + s"taco,id_taco,${endTime(1000)},0,1000,full", + s"taco,id_taco,${endTime(2000)},100,2000,incremental" + ).mkString("\n") + assertEquals(expected, stdout.toString("UTF-8").trim) + } + + @Test + def testMissingTable(): Unit = { + val io = new BackupIO(new Configuration(), rootPath) + createStandardTableMetadata(io) + + val options = createOptions(rootPath, ListType.ALL, Seq("pizza", "nope")) + val stdout = new ByteArrayOutputStream + val stderr = new ByteArrayOutputStream + Console.withOut(new PrintStream(stdout, false, "UTF-8")) { + Console.withErr(new PrintStream(stderr, false, "UTF-8")) { + assertEquals(1, KuduBackupCLI.run(options)) + } + } + + val headerString = KuduBackupLister.HEADER.mkString(",") + val expected = Seq( + headerString, + s"pizza,id_pizza,${endTime(200)},0,200,full", + s"pizza,id_pizza,${endTime(400)},200,400,incremental", + s"pizza,id_pizza,${endTime(600)},400,600,incremental" + ).mkString("\n") + assertEquals(expected, stdout.toString("UTF-8").trim) + + assertEquals("No backups were found for 1 table(s):\nnope", stderr.toString("UTF-8").trim) + } + + def createOptions( + rootPath: String, + listType: ListType.Value, + tables: Seq[String] = Seq(), + format: Format.Value = Format.CSV): BackupCLIOptions = { + new BackupCLIOptions(rootPath, Mode.LIST, tables = tables, listType = listType, format = format) + } +} + +class LocalTestKuduBackupLister extends BaseTestKuduBackupLister { + + @Before + def setUp(): Unit = { + rootPath = Files.createTempDirectory("local-test").toAbsolutePath.toString + } + + @After + def tearDown(): Unit = { + FileUtils.deleteDirectory(new File(rootPath)) + } +} + +class HDFSTestKuduBackupLister extends BaseTestKuduBackupLister { + + var baseDir: File = _ + + @Before + def setUp(): Unit = { + baseDir = Files.createTempDirectory("hdfs-test").toFile.getAbsoluteFile + + // Create an HDFS mini-cluster. + val conf = new Configuration() + conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath) + val hdfsCluster = new MiniDFSCluster.Builder(conf).build() + + // Set the root path to use the HDFS URI. + rootPath = "hdfs://localhost:" + hdfsCluster.getNameNodePort + "/" + } + + @After + def tearDown(): Unit = { + FileUtil.fullyDelete(baseDir) + } +} diff --git a/java-scala-spark4/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestUtils.scala b/java-scala-spark4/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestUtils.scala new file mode 100644 index 0000000000..f681791d24 --- /dev/null +++ b/java-scala-spark4/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestUtils.scala @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import org.apache.hadoop.fs.Path + +import org.apache.kudu.backup.Backup.TableMetadataPB +import org.apache.kudu.backup.TableMetadata.MetadataVersion + +object TestUtils { + + // Create dummy table metadata and write it to the test directory. + def createTableMetadata(io: BackupIO, tableName: String, fromMs: Long, toMs: Long): Unit = { + // Create dummy table metadata with just enough information to be used to create a BackupGraph. + val tableId = s"id_$tableName" + val metadata = TableMetadataPB + .newBuilder() + .setVersion(MetadataVersion) + .setFromMs(fromMs) + .setToMs(toMs) + .setTableName(tableName) + .setTableId(tableId) + .build() + val backupPath = new Path(io.tablePath(tableId, tableName), s"$toMs") + val metadataPath = io.backupMetadataPath(backupPath) + io.writeTableMetadata(metadata, metadataPath) + } +} diff --git a/java-scala-spark4/kudu-backup/build.gradle b/java-scala-spark4/kudu-backup/build.gradle new file mode 100644 index 0000000000..ec6f521650 --- /dev/null +++ b/java-scala-spark4/kudu-backup/build.gradle @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +apply plugin: "scala" +apply from: "$rootDir/gradle/shadow.gradle" + +dependencies { + // Note: We don't use the shaded version, so we can control the dependencies. + compile(project(path: ":kudu-backup-common")) { + // Ensure we use the hadoop-client provided by Spark to avoid any compatibility issues. + exclude group: "org.apache.hadoop", module: "hadoop-common" + // Ensure we use the scala-library provided by Spark to avoid any compatibility issues. + exclude group: "org.scala-lang", module: "scala-library" + } + compile project(path: ":kudu-client", configuration: "shadow") + compile project(path: ":kudu-spark", configuration: "shadow") + compile (libs.scopt) { + // Make sure wrong Scala version is not pulled in. + exclude group: "org.scala-lang", module: "scala-library" + } + // TODO(KUDU-2500): Spark uses reflection which requires the annotations at runtime. + compile libs.yetusAnnotations + + + provided libs.scalaLibrary + provided libs.sparkAvro + provided libs.sparkCore + provided libs.sparkSql + provided libs.slf4jApi + provided libs.scalaParallelCollections + + testCompile project(path: ":kudu-test-utils", configuration: "shadow") + testCompile project(path: ":kudu-spark", configuration: "test") + testCompile libs.junit + testCompile libs.log4jApi + testCompile libs.log4jCore + testCompile libs.log4jSlf4jImpl + testCompile libs.scalatest + +} + +// Adjust the artifact name to match the maven build. +archivesBaseName = "kudu-backup${versions.sparkBase}_${versions.scalaBase}" + +// kudu-backup has no public Javadoc. +javadoc { + enabled = false +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/BackupUtils.scala b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/BackupUtils.scala new file mode 100644 index 0000000000..7701134f22 --- /dev/null +++ b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/BackupUtils.scala @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import org.apache.kudu.Schema +import org.apache.kudu.spark.kudu.SparkUtil +import org.apache.spark.sql.types.ByteType +import org.apache.spark.sql.types.StructField +import org.apache.spark.sql.types.StructType + +object BackupUtils { + + /** + * Returns the Spark schema for backup data based on the Kudu Schema. + * Additionally handles adding the RowAction column for incremental backup/restore. + */ + def dataSchema(schema: Schema, includeRowAction: Boolean = true): StructType = { + var fields = SparkUtil.sparkSchema(schema).fields + if (includeRowAction) { + val changeTypeField = generateRowActionColumn(schema) + fields = fields ++ Seq(changeTypeField) + } + StructType(fields) + } + + /** + * Generates a RowAction column and handles column name collisions. + * The column name can vary because it's accessed positionally. + */ + private def generateRowActionColumn(schema: Schema): StructField = { + val columnName = new StringBuffer("backup_row_action") + // If the column already exists and we need to pick an alternate column name. + while (schema.hasColumn(columnName.toString)) { + columnName.append("_") + } + StructField(columnName.toString, ByteType) + } + +} diff --git a/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackup.scala b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackup.scala new file mode 100644 index 0000000000..7813cd1989 --- /dev/null +++ b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackup.scala @@ -0,0 +1,185 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import org.apache.kudu.spark.kudu.KuduContext +import org.apache.spark.sql.SaveMode +import org.apache.spark.sql.SparkSession +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +import java.util.concurrent.ForkJoinPool +import scala.collection.parallel.CollectionConverters.seqIsParallelizable +import scala.jdk.CollectionConverters._ +import scala.collection.parallel.ForkJoinTaskSupport +import scala.util.Failure +import scala.util.Success +import scala.util.Try + +/** + * The main class for a Kudu backup spark job. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +object KuduBackup { + val log: Logger = LoggerFactory.getLogger(getClass) + + private def doBackup( + tableName: String, + context: KuduContext, + session: SparkSession, + io: BackupIO, + options: BackupOptions, + backupMap: Map[String, BackupGraph]): Unit = { + var tableOptions = options.copy() // Copy the options so we can modify them for the table. + val table = context.syncClient.openTable(tableName) + val tableId = table.getTableId + val backupPath = io.backupPath(tableId, tableName, tableOptions.toMs) + val metadataPath = io.backupMetadataPath(backupPath) + log.info(s"Backing up table $tableName to path: $backupPath") + + // Unless we are forcing a full backup or a fromMs was set, find the previous backup and + // use the `to_ms` metadata as the `from_ms` time for this backup. + var incremental = false + if (tableOptions.forceFull) { + log.info("Performing a full backup: forceFull was set to true") + } else if (tableOptions.fromMs != BackupOptions.DefaultFromMS) { + log.info(s"Performing an incremental backup: fromMs was set to ${tableOptions.fromMs}") + incremental = true + } else { + log.info("Looking for a previous backup: forceFull and fromMs options are not set.") + if (backupMap.contains(tableId) && backupMap(tableId).hasFullBackup) { + val base = backupMap(tableId).backupBase + log.info(s"Setting fromMs to ${base.metadata.getToMs} from backup in path: ${base.path}") + tableOptions = tableOptions.copy(fromMs = base.metadata.getToMs) + incremental = true + } else { + log.info("No previous backup was found. Starting a full backup.") + tableOptions = tableOptions.copy(forceFull = true) + } + } + + val jobTypeStr = if (incremental) "incremental" else "full" + session.sparkContext.setJobDescription(s"Kudu Backup($jobTypeStr): $tableName") + + val rdd = new KuduBackupRDD(table, tableOptions, incremental, context, session.sparkContext) + val df = + session.sqlContext + .createDataFrame(rdd, BackupUtils.dataSchema(table.getSchema, incremental)) + + // Ensure maximum compatibility for dates before 1582-10-15 or timestamps before + // 1900-01-01T00:00:00Z in Parquet. Otherwise incorrect values may be read by + // Spark 2 or legacy version of Hive. See more details in SPARK-31404. + session.conf.set("spark.sql.legacy.parquet.datetimeRebaseModeInWrite", "LEGACY") + session.conf.set("spark.sql.legacy.parquet.int96RebaseModeInWrite", "LEGACY") + + // Write the data to the backup path. + // The backup path contains the timestampMs and should not already exist. + val writer = df.write.mode(SaveMode.ErrorIfExists) + writer + .format(tableOptions.format) + .save(backupPath.toString) + + // Generate and output the new metadata for this table. + // The existence of metadata indicates this backup was successful. + val tableMetadata = TableMetadata + .getTableMetadata(table, tableOptions.fromMs, tableOptions.toMs, tableOptions.format) + io.writeTableMetadata(tableMetadata, metadataPath) + } + + def run(options: BackupOptions, session: SparkSession): Boolean = { + // Set the job group for all the spark backup jobs. + // Note: The job description will be overridden by each Kudu table job. + session.sparkContext.setJobGroup(s"Kudu Backup @ ${options.toMs}", "Kudu Backup") + + log.info(s"Backing up to root path: ${options.rootPath}") + val context = + new KuduContext( + options.kuduMasterAddresses, + session.sparkContext + ) + val io = new BackupIO(session.sparkContext.hadoopConfiguration, options.rootPath) + + // Read the required backup metadata. + val backupGraphs = + // Only read the backup metadata if it will be used. + if (!options.forceFull || options.fromMs != BackupOptions.DefaultFromMS) { + // Convert the input table names to be backed up into table IDs. + // This will allow us to link against old backup data by referencing + // the static table ID even when the table name changes between backups. + val nameToId = context.syncClient.getTablesList.getTableInfosList.asScala + .filter(info => options.tables.contains(info.getTableName)) + .map(info => (info.getTableName, info.getTableId)) + .toMap + val tableIds = options.tables.flatMap(nameToId.get) + io.readBackupGraphsByTableId(tableIds) + } else { + Seq[BackupGraph]() + } + // Key the backupMap by the table ID. + val backupMap = backupGraphs.map { graph => + (graph.tableId, graph) + }.toMap + + // Parallelize the processing. Managing resources of parallel backup jobs is very complex, so + // only the simplest possible thing is attempted. Kudu trusts Spark to manage resources. + val parallelTables = options.tables.toList.par + parallelTables.tasksupport = new ForkJoinTaskSupport( + new ForkJoinPool(options.numParallelBackups)) + val backupResults = parallelTables.map { tableName => + val backupResult = Try(doBackup(tableName, context, session, io, options, backupMap)) + backupResult match { + case Success(()) => + log.info(s"Successfully backed up table $tableName") + case Failure(ex) => + if (options.numParallelBackups == 1 && options.failOnFirstError) + throw ex + else + log.error(s"Failed to back up table $tableName", ex) + } + (tableName, backupResult) + } + + backupResults.filter(_._2.isFailure).foreach { + case (tableName, ex) => + log.error( + s"Failed to back up table $tableName: Look back in the logs for the full exception. Error: ${ex.toString}") + } + !backupResults.exists(_._2.isFailure) + } + + def main(args: Array[String]): Unit = { + val options = BackupOptions + .parse(args) + .getOrElse(throw new IllegalArgumentException("could not parse the arguments")) + + val session = SparkSession + .builder() + .appName("Kudu Table Backup") + .getOrCreate() + + val isRunSuccessful: Boolean = run(options, session) + if (!isRunSuccessful) { + throw new RuntimeException("Kudu Table Backup application failed") + } + + session.stop() + } +} diff --git a/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackupRDD.scala b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackupRDD.scala new file mode 100644 index 0000000000..6e87e05fa3 --- /dev/null +++ b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackupRDD.scala @@ -0,0 +1,170 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import java.util.concurrent.TimeUnit + +import org.apache.kudu.client.AsyncKuduScanner.ReadMode +import org.apache.kudu.client.KuduScannerIterator.NextRowsCallback +import org.apache.kudu.client._ +import org.apache.kudu.spark.kudu.KuduContext +import org.apache.kudu.util.HybridTimeUtil +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Row +import org.apache.spark.Partition +import org.apache.spark.SparkContext +import org.apache.spark.TaskContext +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability + +import scala.collection.JavaConverters._ + +@InterfaceAudience.Private +@InterfaceStability.Unstable +@SerialVersionUID(1L) +class KuduBackupRDD private[kudu] ( + @transient val table: KuduTable, + @transient val options: BackupOptions, + val incremental: Boolean, + val kuduContext: KuduContext, + @transient val sc: SparkContext) + extends RDD[Row](sc, Nil) { + + override protected def getPartitions: Array[Partition] = { + val client = kuduContext.syncClient + + val builder = client + .newScanTokenBuilder(table) + .cacheBlocks(false) + .replicaSelection(ReplicaSelection.CLOSEST_REPLICA) + .readMode(ReadMode.READ_AT_SNAPSHOT) + .batchSizeBytes(options.scanBatchSize) + .scanRequestTimeout(options.scanRequestTimeoutMs) + .prefetching(options.scanPrefetching) + .keepAlivePeriodMs(options.keepAlivePeriodMs) + // TODO(KUDU-3135): Make backup scans a bit more resilient to column renames given these + // jobs are often critical, longer running, and scheduled in bulk. Once scans with + // provided table metadata better handle column renames this can be removed. + .includeTableMetadata(false) + + options.splitSizeBytes.foreach { size => + builder.setSplitSizeBytes(size) + } + + // Set a hybrid time for the scan to ensure application consistency. + val toMicros = TimeUnit.MILLISECONDS.toMicros(options.toMs) + val toHTT = + HybridTimeUtil.physicalAndLogicalToHTTimestamp(toMicros, 0) + + if (incremental) { + val fromMicros = TimeUnit.MILLISECONDS.toMicros(options.fromMs) + val fromHTT = + HybridTimeUtil.physicalAndLogicalToHTTimestamp(fromMicros, 0) + builder.diffScan(fromHTT, toHTT) + } else { + builder.snapshotTimestampRaw(toHTT) + } + + // Create the scan tokens for each partition. + val tokens = builder.build() + tokens.asScala.zipWithIndex.map { + case (token, index) => + // Only list the leader replica as the preferred location if + // replica selection policy is leader only, to take advantage + // of scan locality. + val locations: Array[String] = { + if (options.scanLeaderOnly) { + Array(token.getTablet.getLeaderReplica.getRpcHost) + } else { + token.getTablet.getReplicas.asScala.map(_.getRpcHost).toArray + } + } + KuduBackupPartition(index, token.serialize(), locations) + }.toArray + } + + override def compute(part: Partition, taskContext: TaskContext): Iterator[Row] = { + val client: KuduClient = kuduContext.syncClient + val partition: KuduBackupPartition = part.asInstanceOf[KuduBackupPartition] + val scanner = + KuduScanToken.deserializeIntoScanner(partition.scanToken, client) + // We don't store the RowResult so we can enable the reuseRowResult optimization. + scanner.setReuseRowResult(true) + new RowIterator(scanner, kuduContext, incremental) + } + + override def getPreferredLocations(partition: Partition): Seq[String] = { + partition.asInstanceOf[KuduBackupPartition].locations + } +} + +private case class KuduBackupPartition(index: Int, scanToken: Array[Byte], locations: Array[String]) + extends Partition + +/** + * This iterator wraps a KuduScanner, converts the returned RowResults into a + * Spark Row, and allows iterating over those scanned results. + * + * The Spark RDD abstraction has an abstract compute method, implemented in KuduBackupRDD, + * that takes the job partitions and task context and expects to return an Iterator[Row]. + * This implementation facilitates that. + */ +private class RowIterator( + private val scanner: KuduScanner, + val kuduContext: KuduContext, + val incremental: Boolean) + extends Iterator[Row] { + + private val scannerIterator = scanner.iterator() + private val nextRowsCallback = new NextRowsCallback { + override def call(numRows: Int): Unit = { + if (TaskContext.get().isInterrupted()) { + throw new RuntimeException("Kudu task interrupted") + } + kuduContext.timestampAccumulator.add(kuduContext.syncClient.getLastPropagatedTimestamp) + } + } + + override def hasNext: Boolean = { + scannerIterator.hasNext(nextRowsCallback) + } + + override def next(): Row = { + val rowResult = scannerIterator.next() + val fieldCount = rowResult.getColumnProjection.getColumnCount + // If this is an incremental backup, the last column is the is_deleted column. + val columnCount = if (incremental) fieldCount - 1 else fieldCount + val columns = Array.ofDim[Any](fieldCount) + for (i <- 0 until columnCount) { + columns(i) = rowResult.getObject(i) + } + // If this is an incremental backup, translate the is_deleted column into + // the "change_type" column as the last field. + if (incremental) { + val rowAction = if (rowResult.isDeleted) { + RowAction.DELETE.getValue + } else { + // If the row is not deleted, we do not know if it was inserted or updated, + // so we use upsert. + RowAction.UPSERT.getValue + } + columns(fieldCount - 1) = rowAction + } + Row.fromSeq(columns) + } +} diff --git a/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduRestore.scala b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduRestore.scala new file mode 100644 index 0000000000..112edacb50 --- /dev/null +++ b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduRestore.scala @@ -0,0 +1,429 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import org.apache.kudu.backup.Backup.TableMetadataPB +import org.apache.kudu.client.AlterTableOptions +import org.apache.kudu.client.KuduPartitioner +import org.apache.kudu.client.Partition +import org.apache.kudu.client.RangePartitionBound +import org.apache.kudu.client.RangePartitionWithCustomHashSchema +import org.apache.kudu.client.SessionConfiguration.FlushMode +import org.apache.kudu.spark.kudu.KuduContext +import org.apache.kudu.spark.kudu.RowConverter +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.SparkSession +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +import scala.collection.parallel.ForkJoinTaskSupport +import java.util.concurrent.ForkJoinPool +import scala.collection.parallel.CollectionConverters.seqIsParallelizable +import scala.jdk.CollectionConverters._ +import scala.util.Failure +import scala.util.Success +import scala.util.Try + +/** + * The main class for a Kudu restore spark job. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +object KuduRestore { + val log: Logger = LoggerFactory.getLogger(getClass) + val ImpalaPrefix = "impala::" + + /** + * Returns the table name in which the data will be restored considering the flags removeImpalaPrefix, + * newDatabaseName and tableSuffix + */ + def getRestoreTableName(fullTableName: String, options: RestoreOptions): String = { + // Break the table down into prefix::databaseName.tableName + var prefix = "" + var databaseName = "" + var tableName = fullTableName + val hasImpalaPrefix = tableName.startsWith(ImpalaPrefix) + if (hasImpalaPrefix) { + prefix = ImpalaPrefix + tableName = tableName.substring(ImpalaPrefix.length) + } + val hasDatabase = tableName.contains(".") + if (hasDatabase) { + databaseName = tableName.substring(0, tableName.indexOf(".") + 1) + tableName = tableName.substring(tableName.indexOf(".") + 1) + } + + // If the user does not want the Impala prefix, drop it + if (options.removeImpalaPrefix) { + prefix = "" + } + + // If there is a databaseName specified by the user, use that + if (options.newDatabaseName.nonEmpty) { + databaseName = options.newDatabaseName.concat(".") + } + + s"${prefix}${databaseName}${tableName}${options.tableSuffix}" + } + + private def doRestore( + tableName: String, + context: KuduContext, + session: SparkSession, + io: BackupIO, + options: RestoreOptions, + backupMap: Map[String, BackupGraph]): Unit = { + if (!backupMap.contains(tableName)) { + throw new RuntimeException(s"No valid backups found for table: $tableName") + } + val graph = backupMap(tableName) + val restorePath = graph.restorePath + val lastMetadata = restorePath.backups.last.metadata + val restoreName = getRestoreTableName(lastMetadata.getTableName, options) + val numJobs = restorePath.backups.size + var currentJob = 1 + restorePath.backups.foreach { backup => + session.sparkContext.setJobDescription(s"Kudu Restore($currentJob/$numJobs): $tableName") + + log.info(s"Restoring table $tableName from path: ${backup.path}") + val metadata = backup.metadata + val isFullRestore = metadata.getFromMs == 0 + // TODO (KUDU-2788): Store the full metadata to compare/validate for each applied partial. + + // On the full restore we may need to create the table. + if (isFullRestore) { + if (options.createTables) { + log.info(s"Creating restore table $restoreName") + // We use the last schema in the restore path when creating the table to + // ensure the table is created in its final state. + createTableRangePartitionByRangePartition( + restoreName, + lastMetadata, + options.restoreOwner, + context) + } + } + val backupSchema = BackupUtils.dataSchema(TableMetadata.getKuduSchema(metadata)) + val rowActionCol = backupSchema.fields.last.name + + var data = session.sqlContext.read + .format(metadata.getDataFormat) + .schema(backupSchema) + .load(backup.path.toString) + // Default the the row action column with a value of "UPSERT" so that the + // rows from a full backup, which don't have a row action, are upserted. + // TODO(ghenke): Consider using INSERT_IGNORE for full backups. + .na + .fill(RowAction.UPSERT.getValue, Seq(rowActionCol)) + + // Adjust for dropped and renamed columns. + data = adjustSchema(data, metadata, lastMetadata, rowActionCol) + val restoreSchema = data.schema + + // Write the data to Kudu. + data.queryExecution.toRdd.foreachPartition { internalRows => + val table = context.syncClient.openTable(restoreName) + val converter = new RowConverter(table.getSchema, restoreSchema, false) + val partitioner = createPartitionFilter(metadata, lastMetadata) + val session = context.syncClient.newSession + session.setFlushMode(FlushMode.AUTO_FLUSH_BACKGROUND) + // In the case of task retries we need to ignore NotFound errors for deleted rows. + // This can't occur if DELETE_IGNORE is used, but still needs to be set in the case + // DELETE is used for backwards compatibility. + session.setIgnoreAllNotFoundRows(true) + try for (internalRow <- internalRows) { + // Convert the InternalRows to Rows. + // This avoids any corruption as reported in SPARK-26880. + val row = converter.toRow(internalRow) + // Get the operation type based on the row action column. + // This will always be the last column in the row. + val rowActionValue = row.getByte(row.length - 1) + val rowAction = RowAction.fromValue(rowActionValue) + // Generate an operation based on the row action. + val operation = rowAction match { + case RowAction.UPSERT => table.newUpsert() + case RowAction.DELETE => { + if (context.supportsIgnoreOperations) { + table.newDeleteIgnore() + } else { + table.newDelete() + } + } + case _ => throw new IllegalStateException(s"Unsupported RowAction: $rowAction") + } + // Convert the Spark row to a partial row and set it on the operation. + val partialRow = converter.toPartialRow(row) + // Drop rows that are not covered by the partitioner. This is how we + // detect a partition which was dropped between backups and filter + // out the rows from that dropped partition. + if (partitioner.isCovered(partialRow)) { + operation.setRow(partialRow) + session.apply(operation) + } + } finally { + session.close() + } + // Fail the task if there are any errors. + // It is important to capture all of the errors via getRowErrors and then check + // the length because each call to session.getPendingErrors clears the ErrorCollector. + val pendingErrors = session.getPendingErrors + if (pendingErrors.getRowErrors.nonEmpty) { + val errors = pendingErrors.getRowErrors + val sample = errors.take(5).map(_.getErrorStatus).mkString + if (pendingErrors.isOverflowed) { + throw new RuntimeException( + s"PendingErrors overflowed. Failed to write at least ${errors.length} rows " + + s"to Kudu; Sample errors: $sample") + } else { + throw new RuntimeException( + s"Failed to write ${errors.length} rows to Kudu; Sample errors: $sample") + } + } + } + currentJob += 1 + } + } + + def run(options: RestoreOptions, session: SparkSession): Boolean = { + // Set the job group for all the spark restore jobs. + // Note: The job description will be overridden by each Kudu table job. + session.sparkContext.setJobGroup(s"Kudu Restore @ ${options.timestampMs}", "Kudu Restore") + + log.info(s"Restoring from path: ${options.rootPath}") + val context = + new KuduContext( + options.kuduMasterAddresses, + session.sparkContext + ) + val io = new BackupIO(session.sparkContext.hadoopConfiguration, options.rootPath) + + // Read the required backup metadata. + val backupGraphs = io.readBackupGraphsByTableName(options.tables, options.timestampMs) + + // Key the backupMap by the last table name. + val backupMap = backupGraphs + .groupBy(_.restorePath.tableName) + .mapValues(_.maxBy(_.restorePath.toMs)) + + // Parallelize the processing. Managing resources of parallel restore jobs is very complex, so + // only the simplest possible thing is attempted. Kudu trusts Spark to manage resources. + // TODO (KUDU-2832): If the job fails to restore a table it may still create the table, which + // will cause subsequent restores to fail unless the table is deleted or the restore suffix is + // changed. We ought to try to clean up the mess when a failure happens. + val parallelTables = options.tables.par + parallelTables.tasksupport = new ForkJoinTaskSupport( + new ForkJoinPool(options.numParallelRestores)) + val restoreResults = parallelTables.map { tableName => + val restoreResult = + Try(doRestore(tableName, context, session, io, options, backupMap.toMap)) + restoreResult match { + case Success(()) => + log.info(s"Successfully restored table $tableName") + case Failure(ex) => + if (options.numParallelRestores == 1 && options.failOnFirstError) + throw ex + else + log.error(s"Failed to restore table $tableName", ex) + } + (tableName, restoreResult) + } + + restoreResults.filter(_._2.isFailure).foreach { + case (tableName, ex) => + log.error( + s"Failed to restore table $tableName: Look back in the logs for the full exception. Error: ${ex.toString}") + } + !restoreResults.exists(_._2.isFailure) + } + + // Kudu isn't good at creating a lot of tablets at once, and by default tables may only be created + // with at most 60 tablets. Additional tablets can be added later by adding range partitions. So, + // to restore tables with more tablets than that, we need to create the table piece-by-piece. This + // does so in the simplest way: creating the table with the first range partition, if there is + // one, and then altering it to add the rest of the partitions, one partition at a time. + private def createTableRangePartitionByRangePartition( + restoreName: String, + metadata: TableMetadataPB, + restoreOwner: Boolean, + context: KuduContext): Unit = { + // Create the table with the first range partition (or none if there are none). + val schema = TableMetadata.getKuduSchema(metadata) + val options = TableMetadata.getCreateTableOptionsWithoutRangePartitions(metadata, restoreOwner) + // Returns the range bounds of the ranges that contain the table wide hash schema. + val boundsWithoutHashSchema = TableMetadata.getRangeBoundPartialRows(metadata) + // Returns the range bounds and hash schema of the ranges that contain a custom hash schema. + val boundsWithCustomHashSchema = + TableMetadata.getRangeBoundsPartialRowsWithHashSchemas(metadata) + if (boundsWithoutHashSchema.nonEmpty) { + // Adds the first range partition with table wide hash schema through create. + boundsWithoutHashSchema.headOption.foreach(bound => { + val (lower, upper) = bound + options.addRangePartition(lower, upper) + }) + context.createTable(restoreName, schema, options) + + // Add the rest of the range partitions with table wide hash schema through alters. + boundsWithoutHashSchema.tail.foreach(bound => { + val (lower, upper) = bound + val options = new AlterTableOptions() + options.addRangePartition(lower, upper) + context.syncClient.alterTable(restoreName, options) + }) + + // Adds range partitions with custom hash schema through alters. + boundsWithCustomHashSchema.foreach(bound => { + val rangePartition = new RangePartitionWithCustomHashSchema( + bound.lowerBound, + bound.upperBound, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND) + bound.hashSchemas.asScala.foreach { hp => + val columnNames = hp.getColumnIds.asScala.map { id => + schema.getColumnByIndex(id).getName + } + rangePartition.addHashPartitions(columnNames.asJava, hp.getNumBuckets, hp.getSeed) + } + val options = new AlterTableOptions() + options.addRangePartition(rangePartition) + context.syncClient.alterTable(restoreName, options) + }) + } else if (boundsWithCustomHashSchema.nonEmpty) { + // Adds first range partition with custom hash schema through create. + boundsWithCustomHashSchema.headOption.foreach(bound => { + val rangePartition = new RangePartitionWithCustomHashSchema( + bound.lowerBound, + bound.upperBound, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND) + bound.hashSchemas.asScala.foreach { hp => + val columnNames = hp.getColumnIds.asScala.map { id => + schema.getColumnByIndex(id).getName + } + rangePartition.addHashPartitions(columnNames.asJava, hp.getNumBuckets, hp.getSeed) + } + options.addRangePartition(rangePartition) + }) + context.createTable(restoreName, schema, options) + // Adds rest of range partitions with custom hash schema through alters. + boundsWithCustomHashSchema.tail.foreach(bound => { + val rangePartition = new RangePartitionWithCustomHashSchema( + bound.lowerBound, + bound.upperBound, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND) + bound.hashSchemas.asScala.foreach { hp => + val columnNames = hp.getColumnIds.asScala.map { id => + schema.getColumnByIndex(id).getName + } + rangePartition.addHashPartitions(columnNames.asJava, hp.getNumBuckets, hp.getSeed) + } + val options = new AlterTableOptions() + options.addRangePartition(rangePartition) + context.syncClient.alterTable(restoreName, options) + }) + } + } + + /** + * Returns a modified DataFrame with columns adjusted to match the lastMetadata. + */ + private def adjustSchema( + df: DataFrame, + currentMetadata: TableMetadataPB, + lastMetadata: TableMetadataPB, + rowActionCol: String): DataFrame = { + log.info("Adjusting columns to handle alterations") + val idToName = lastMetadata.getColumnIdsMap.asScala.map(_.swap) + // Ignore the rowActionCol, which isn't a real column. + val currentColumns = currentMetadata.getColumnIdsMap.asScala.filter(_._1 != rowActionCol) + var result = df + // First drop all the columns that no longer exist. + // This is required to be sure a rename doesn't collide with an old column. + currentColumns.foreach { + case (colName, id) => + if (!idToName.contains(id)) { + // If the last metadata doesn't contain the id, the column is dropped. + log.info(s"Dropping the column $colName from backup data") + result = result.drop(colName) + } + } + // Then rename all the columns that were renamed in the last metadata. + currentColumns.foreach { + case (colName, id) => + if (idToName.contains(id) && idToName(id) != colName) { + // If the final name doesn't match the current name, the column is renamed. + log.info(s"Renamed the column $colName to ${idToName(id)} in backup data") + result = result.withColumnRenamed(colName, idToName(id)) + } + } + result + } + + /** + * Creates a KuduPartitioner that can be used to filter out rows for the current + * backup data which no longer apply to partitions in the last metadata. + * + * In order to do this, tablet metadata are compared in the current metadata to the + * last metadata. Tablet IDs that are not in the final metadata are filtered out and + * the remaining tablet metadata is used to create a KuduPartitioner. The resulting + * KuduPartitioner can then be used to filter out rows that are no longer valid + * because those rows will fall into a non-covered range. + */ + private def createPartitionFilter( + currentMetadata: TableMetadataPB, + lastMetadata: TableMetadataPB): KuduPartitioner = { + val lastTablets = lastMetadata.getTabletsMap + val validTablets = + currentMetadata.getTabletsMap.asScala.flatMap { + case (id, pm) => + if (lastTablets.containsKey(id)) { + // Create the partition object needed for the KuduPartitioner. + val partition = new Partition( + pm.getPartitionKeyStart.toByteArray, + pm.getPartitionKeyEnd.toByteArray, + pm.getHashBucketsList) + Some((id, partition)) + } else { + // Ignore tablets that are no longer valid + None + } + } + val partitionSchema = TableMetadata.getPartitionSchema(currentMetadata) + new KuduPartitioner(partitionSchema, validTablets.asJava) + } + + def main(args: Array[String]): Unit = { + val options = RestoreOptions + .parse(args) + .getOrElse(throw new IllegalArgumentException("could not parse the arguments")) + + val session = SparkSession + .builder() + .appName("Kudu Table Restore") + .getOrCreate() + + val isRunSuccessful: Boolean = run(options, session) + if (!isRunSuccessful) { + throw new RuntimeException("Kudu Table Restore application failed!") + } + + session.stop() + } +} diff --git a/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/Options.scala b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/Options.scala new file mode 100644 index 0000000000..5201e60fd9 --- /dev/null +++ b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/Options.scala @@ -0,0 +1,300 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup + +import java.net.InetAddress + +import org.apache.kudu.client.AsyncKuduClient +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability +import scopt.OptionParser + +@InterfaceAudience.Private +@InterfaceStability.Unstable +case class BackupOptions( + tables: Seq[String], + rootPath: String, + kuduMasterAddresses: String = InetAddress.getLocalHost.getCanonicalHostName, + toMs: Long = System.currentTimeMillis(), + forceFull: Boolean = BackupOptions.DefaultForceFull, + fromMs: Long = BackupOptions.DefaultFromMS, + format: String = BackupOptions.DefaultFormat, + scanBatchSize: Int = BackupOptions.DefaultScanBatchSize, + scanRequestTimeoutMs: Long = BackupOptions.DefaultScanRequestTimeoutMs, + scanLeaderOnly: Boolean = BackupOptions.DefaultScanLeaderOnly, + scanPrefetching: Boolean = BackupOptions.DefaultScanPrefetching, + keepAlivePeriodMs: Long = BackupOptions.DefaultKeepAlivePeriodMs, + failOnFirstError: Boolean = BackupOptions.DefaultFailOnFirstError, + numParallelBackups: Int = BackupOptions.DefaultNumParallelBackups, + splitSizeBytes: Option[Long] = None) + +object BackupOptions { + val DefaultForceFull: Boolean = false + val DefaultFromMS: Long = 0 + val DefaultFormat: String = "parquet" + val DefaultScanBatchSize: Int = 1024 * 1024 * 20 // 20 MiB + val DefaultScanRequestTimeoutMs: Long = + AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS // 30 seconds + val DefaultScanLeaderOnly: Boolean = false + // TODO (KUDU-1260): Add a test and enable by default? + val DefaultScanPrefetching: Boolean = false + val DefaultKeepAlivePeriodMs: Long = AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS + val DefaultFailOnFirstError: Boolean = false + val DefaultNumParallelBackups = 1 + val DefaultSplitSizeBytes: Option[Long] = None + + // We use the program name to make the help output show a the spark invocation required. + val ClassName: String = KuduBackup.getClass.getCanonicalName.dropRight(1) // Remove trailing `$` + val ProgramName: String = "spark-submit --class " + ClassName + " [spark-options] " + + "" + + val parser: OptionParser[BackupOptions] = + new OptionParser[BackupOptions](ProgramName) { + opt[String]("rootPath") + .action((v, o) => o.copy(rootPath = v)) + .text("The root path to output backup data. Accepts any Spark compatible path.") + .required() + + opt[String]("kuduMasterAddresses") + .action((v, o) => o.copy(kuduMasterAddresses = v)) + .text("Comma-separated addresses of Kudu masters. Default: localhost") + .optional() + + opt[Boolean]("forceFull") + .action((v, o) => o.copy(forceFull = v)) + .text("If true, this will be a full backup even if another full already exists. " + + "Default: " + DefaultForceFull) + .optional() + + opt[Long]("fromMs") + .action((v, o) => o.copy(fromMs = v)) + .text( + "A UNIX timestamp in milliseconds that defines the start time of an incremental " + + "backup. If unset, the fromMs will be defined by previous backups in the root " + + "directory.") + .optional() + + opt[Long]("timestampMs") + .action((v, o) => o.copy(toMs = v)) + // TODO (KUDU-2677): Document the limitations based on cluster configuration. + .text("A UNIX timestamp in milliseconds since the epoch to execute scans at. " + + "Default: `System.currentTimeMillis()`") + .optional() + + opt[Int]("scanBatchSize") + .action((v, o) => o.copy(scanBatchSize = v)) + .text("The maximum number of bytes returned by the scanner, on each batch. " + + "Default: " + DefaultScanBatchSize) + .optional() + + opt[Int]("scanRequestTimeoutMs") + .action((v, o) => o.copy(scanRequestTimeoutMs = v)) + .text("Sets how long in milliseconds each scan request to a server can last. " + + "Default: " + DefaultScanRequestTimeoutMs) + .optional() + + opt[Long]("keepAlivePeriodMs") + .action((v, o) => o.copy(keepAlivePeriodMs = v)) + .text("Sets the period at which to send keep-alive requests to the tablet server to " + + "ensure that scanners do not time out. Default: " + DefaultKeepAlivePeriodMs) + .optional() + + opt[Boolean]("scanLeaderOnly") + .action((v, o) => o.copy(scanLeaderOnly = v)) + .text("If true scans will only use the leader replica, otherwise scans will take place " + + "at the closest replica. Default: " + DefaultScanLeaderOnly) + .hidden() + .optional() + + opt[String]("format") + .action((v, o) => o.copy(format = v)) + .text("The file format to use when writing the data. Default: " + DefaultFormat) + .hidden() + .optional() + + opt[Unit]("scanPrefetching") + .action((_, o) => o.copy(scanPrefetching = true)) + .text("An experimental flag to enable pre-fetching data. " + + "Default: " + DefaultScanPrefetching) + .hidden() + .optional() + + opt[Unit]("failOnFirstError") + .action((_, o) => o.copy(failOnFirstError = true)) + .text("Whether to fail the backup job as soon as a single table backup fails. " + + "Default: " + DefaultFailOnFirstError) + .optional() + + opt[Int]("numParallelBackups") + .action((v, o) => o.copy(numParallelBackups = v)) + .text( + "The number of tables to back up in parallel. Backup leaves it to Spark to manage " + + "the resources of parallel jobs. Overrides --failOnFirstError. This option is " + + "experimental. Default: " + DefaultNumParallelBackups) + .hidden() + .optional() + + opt[Long]("splitSizeBytes") + .action((v, o) => o.copy(splitSizeBytes = Some(v))) + .text( + "Sets the target number of bytes per spark task. If set, tablet's primary key range " + + "will be split to generate uniform task sizes instead of the default of 1 task per " + + "tablet. This option is experimental.") + .hidden() + .optional() + + help("help").text("prints this usage text") + + arg[String]("
...") + .unbounded() + .action((v, o) => o.copy(tables = o.tables :+ v)) + .text("A list of tables to be backed up.") + } + + /** + * Parses the passed arguments into Some[KuduBackupOptions]. + * + * If the arguments are bad, an error message is displayed + * and None is returned. + * + * @param args The arguments to parse. + * @return Some[KuduBackupOptions] if parsing was successful, None if not. + */ + def parse(args: Seq[String]): Option[BackupOptions] = { + parser.parse(args, BackupOptions(Seq(), null)) + } +} + +@InterfaceAudience.Private +@InterfaceStability.Unstable +case class RestoreOptions( + tables: Seq[String], + rootPath: String, + kuduMasterAddresses: String = InetAddress.getLocalHost.getCanonicalHostName, + removeImpalaPrefix: Boolean = RestoreOptions.DefaultRemoveImpalaPrefix, + newDatabaseName: String = "", + tableSuffix: String = "", + createTables: Boolean = RestoreOptions.DefaultCreateTables, + timestampMs: Long = System.currentTimeMillis(), + failOnFirstError: Boolean = RestoreOptions.DefaultFailOnFirstError, + numParallelRestores: Int = RestoreOptions.DefaultNumParallelRestores, + restoreOwner: Boolean = RestoreOptions.DefaultRestoreOwner) + +object RestoreOptions { + val DefaultRemoveImpalaPrefix: Boolean = false + val DefaultCreateTables: Boolean = true + val DefaultFailOnFirstError = false + val DefaultNumParallelRestores = 1 + val DefaultRestoreOwner: Boolean = true + + val ClassName: String = KuduRestore.getClass.getCanonicalName.dropRight(1) // Remove trailing `$` + val ProgramName: String = "spark-submit --class " + ClassName + " [spark-options] " + + "" + + val parser: OptionParser[RestoreOptions] = + new OptionParser[RestoreOptions](ProgramName) { + opt[String]("rootPath") + .action((v, o) => o.copy(rootPath = v)) + .text("The root path to the backup data. Accepts any Spark compatible path.") + .required() + + opt[String]("kuduMasterAddresses") + .action((v, o) => o.copy(kuduMasterAddresses = v)) + .text("Comma-separated addresses of Kudu masters. Default: localhost") + .optional() + + opt[Boolean]("createTables") + .action((v, o) => o.copy(createTables = v)) + .text("If true, create the tables during restore. Set to false if the target tables " + + "already exist. Default: " + DefaultCreateTables) + .optional() + + opt[Boolean]("removeImpalaPrefix") + .action((v, o) => o.copy(removeImpalaPrefix = v)) + .text("If true, removes the \"impala::\" prefix, if present from the restored table names. This is " + + "advisable if backup was taken in a Kudu cluster without HMS sync and restoring to " + + "Kudu cluster which has HMS sync in place. Only used when createTables is true. Default: " + + DefaultRemoveImpalaPrefix) + .optional() + + opt[String]("newDatabaseName") + .action((v, o) => o.copy(newDatabaseName = v)) + .text( + "If set, replaces the existing database name and if there is no existing database name, a new database " + + "name is added. Setting this to an empty string will have the same effect of not using the flag at all. " + + "For example, if this is set to newdb for the tables testtable and impala::db.testtable the restored " + + "tables will have the names newdb.testtable and impala::newdb.testtable respectively, assuming " + + "removeImpalaPrefix is set to false") + .optional() + + opt[String]("tableSuffix") + .action((v, o) => o.copy(tableSuffix = v)) + .text("If set, the suffix to add to the restored table names. Only used when " + + "createTables is true.") + .optional() + + opt[Long]("timestampMs") + .action((v, o) => o.copy(timestampMs = v)) + .text("A UNIX timestamp in milliseconds that defines the latest time to use when " + + "selecting restore candidates. Default: `System.currentTimeMillis()`") + .optional() + + opt[Unit]("failOnFirstError") + .action((v, o) => o.copy(failOnFirstError = true)) + .text("Whether to fail the restore job as soon as a single table restore fails. " + + "Default: " + DefaultFailOnFirstError) + .optional() + + opt[Int]("numParallelRestores") + .action((v, o) => o.copy(numParallelRestores = v)) + .text( + "The number of tables to restore in parallel. Restore leaves it to Spark to manage " + + "the resources of parallel jobs. Overrides --failOnFirstError. This option is " + + "experimental. Default: " + DefaultNumParallelRestores) + .hidden() + .optional() + + opt[Boolean]("restoreOwner") + .action((v, o) => o.copy(restoreOwner = v)) + .text( + "If true, it restores table ownership when creating new tables, otherwise creates " + + "tables as the logged in user. Only used when createTables is true. Default: " + + DefaultRestoreOwner) + .optional() + + help("help").text("prints this usage text") + + arg[String]("
...") + .unbounded() + .action((v, o) => o.copy(tables = o.tables :+ v)) + .text("A list of tables to be restored.") + } + + /** + * Parses the passed arguments into Some[KuduRestoreOptions]. + * + * If the arguments are bad, an error message is displayed + * and None is returned. + * + * @param args The arguments to parse. + * @return Some[KuduRestoreOptions] if parsing was successful, None if not. + */ + def parse(args: Seq[String]): Option[RestoreOptions] = { + parser.parse(args, RestoreOptions(Seq(), null)) + } +} diff --git a/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/RowAction.java b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/RowAction.java new file mode 100644 index 0000000000..1d66dfc3e8 --- /dev/null +++ b/java-scala-spark4/kudu-backup/src/main/scala/org/apache/kudu/backup/RowAction.java @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.backup; + +import java.util.Map; + +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * A RowAction is used to represent the action associated with a backed up row. + * + * Currently UPSERT is the default action, while rows with the IS_DELETED virtual + * column have an action of DELETE. This value is serialized as a byte in the + * output data format to be as space efficient as possible. + * + * Given there are currently only 2 options, IS_DELETED or not, we could have used an + * IS_DELETED boolean column in the output format, but this RowAction allows for greater + * format flexibility to support INSERT or UPDATE in the future if we have full fidelity + * and sparse row backups. + * + * See {@link RowIterator} for backup side usage and {@link KuduRestore} for restore + * side usage. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public enum RowAction { + + UPSERT((byte) 0), + DELETE((byte) 1); + + /** The byte value used to represent this RowAction */ + private final byte value; + + private static Map byteRowAction; + static { + byteRowAction = new ImmutableMap.Builder() + .put(UPSERT.getValue(), UPSERT) + .put(DELETE.getValue(), DELETE) + .build(); + } + + RowAction(byte value) { + this.value = value; + } + + public byte getValue() { + return value; + } + + public static RowAction fromValue(Byte value) { + return byteRowAction.get(value); + } +} diff --git a/java-scala-spark4/kudu-backup/src/test/resources/log4j2.properties b/java-scala-spark4/kudu-backup/src/test/resources/log4j2.properties new file mode 100644 index 0000000000..e85deec999 --- /dev/null +++ b/java-scala-spark4/kudu-backup/src/test/resources/log4j2.properties @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +status = error +name = PropertiesConfig +appenders = console + +appender.console.type = Console +appender.console.name = STDOUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n + +rootLogger.level = info +rootLogger.appenderRefs = stdout +rootLogger.appenderRef.stdout.ref = STDOUT + +logger.kudu.name = org.apache.kudu +logger.kudu.level = debug \ No newline at end of file diff --git a/java-scala-spark4/kudu-backup/src/test/scala/org/apache/kudu/backup/TestKuduBackup.scala b/java-scala-spark4/kudu-backup/src/test/scala/org/apache/kudu/backup/TestKuduBackup.scala new file mode 100644 index 0000000000..cc9bcd4de1 --- /dev/null +++ b/java-scala-spark4/kudu-backup/src/test/scala/org/apache/kudu/backup/TestKuduBackup.scala @@ -0,0 +1,1105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.kudu.backup + +import java.nio.file.Files +import java.nio.file.Path +import java.util +import com.google.common.base.Objects +import org.apache.commons.io.FileUtils +import org.apache.kudu.client.PartitionSchema.HashBucketSchema +import org.apache.kudu.client._ +import org.apache.kudu.ColumnSchema +import org.apache.kudu.Schema +import org.apache.kudu.Type +import org.apache.kudu.ColumnSchema.ColumnSchemaBuilder +import org.apache.kudu.spark.kudu.SparkListenerUtil.withJobDescriptionCollector +import org.apache.kudu.spark.kudu.SparkListenerUtil.withJobTaskCounter +import org.apache.kudu.spark.kudu._ +import org.apache.kudu.test.CapturingLogAppender +import org.apache.kudu.test.KuduTestHarness +import org.apache.kudu.test.RandomUtils +import org.apache.kudu.test.KuduTestHarness.TabletServerConfig +import org.apache.kudu.util.DataGenerator.DataGeneratorBuilder +import org.apache.kudu.util.HybridTimeUtil +import org.apache.kudu.util.SchemaGenerator.SchemaGeneratorBuilder +import org.apache.spark.scheduler.SparkListener +import org.apache.spark.scheduler.SparkListenerJobEnd +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertNotEquals +import org.junit.Assert.assertTrue +import org.junit.Assert.fail +import org.junit.After +import org.junit.Before +import org.junit.Test +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +import scala.collection.JavaConverters._ + +class TestKuduBackup extends KuduTestSuite { + val log: Logger = LoggerFactory.getLogger(getClass) + + var random: util.Random = _ + var rootDir: Path = _ + + @Before + def setUp(): Unit = { + random = RandomUtils.getRandom + rootDir = Files.createTempDirectory("backup") + } + + @After + def tearDown(): Unit = { + FileUtils.deleteDirectory(rootDir.toFile) + } + + @Test + def testSimpleBackupAndRestore() { + val rowCount = 100 + insertRows(table, rowCount) // Insert data into the default test table. + + // backup and restore. + backupAndValidateTable(tableName, rowCount, false) + restoreAndValidateTable(tableName, rowCount) + + // Validate the table schemas match. + validateTablesMatch(tableName, s"$tableName-restore") + } + + @Test + def testAutoIncrementingColumnBackupAndRestore() { + val rowCount = 100 + val expectedRowCount = 200 + val simpleAutoIncrementingTableOptions = new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1) + val AutoIncrementingTable = kuduClient.createTable( + simpleAutoIncrementingTableName, + simpleAutoIncrementingSchema, + simpleAutoIncrementingTableOptions) + + val session = kuduClient.newSession() + + // Insert some rows. + Range(0, rowCount).foreach { i => + val insert = AutoIncrementingTable.newInsert + val row = insert.getRow + row.addInt("key", i) + row.addString("val", s"a$i") + session.apply(insert) + } + + // Perform a full backup. + backupAndValidateTable(simpleAutoIncrementingTableName, rowCount, false) + // Insert some more rows. + Range(rowCount, 2 * rowCount).foreach { i => + val insert = AutoIncrementingTable.newInsert + val row = insert.getRow + row.addInt("key", i) + row.addString("val", s"a$i") + session.apply(insert) + } + // Perform an incremental backup. + backupAndValidateTable(simpleAutoIncrementingTableName, rowCount, true) + + // Restore the table. + restoreAndValidateTable(simpleAutoIncrementingTableName, expectedRowCount) + // Validate the table schemas match. + validateTablesMatch( + simpleAutoIncrementingTableName, + s"$simpleAutoIncrementingTableName-restore") + + // Validate the data written in the restored table. + val restoreTable = kuduClient.openTable(s"$simpleAutoIncrementingTableName-restore") + val scanner = kuduClient.newScannerBuilder(restoreTable).build() + val rows = scanner.asScala.toList + assertEquals(expectedRowCount, rows.length) + var i = 0 + rows.foreach { row => + assertEquals(i, row.getInt("key")) + assertEquals(i + 1, row.getLong(Schema.getAutoIncrementingColumnName)) + assertEquals(s"a$i", row.getString("val")) + i += 1 + } + assertEquals(expectedRowCount, rows.length) + } + + @Test + def testSimpleIncrementalBackupAndRestore() { + insertRows(table, 100) // Insert data into the default test table. + + // Run and validate initial backup. + backupAndValidateTable(tableName, 100, false) + + // Insert more rows and validate incremental backup. + insertRows(table, 100, 100) // Insert more data. + backupAndValidateTable(tableName, 100, true) + + // Delete rows that span the full and incremental backup and validate incremental backup. + Range(50, 150).foreach(deleteRow) + backupAndValidateTable(tableName, 100, true) + + // Restore the backups and validate the end result. + restoreAndValidateTable(tableName, 100) + } + + @Test + def testBackupAndRestoreJobNames() { + val rowCount = 100 + insertRows(table, rowCount) // Insert data into the default test table. + + // Backup the table and verify the job description. + val fullDesc = withJobDescriptionCollector(ss.sparkContext) { () => + runBackup(createBackupOptions(Seq(table.getName))) + } + assertEquals(1, fullDesc.size) + assertEquals("Kudu Backup(full): test", fullDesc.head) + + // Backup again and verify the job description. + val incDesc = withJobDescriptionCollector(ss.sparkContext) { () => + runBackup(createBackupOptions(Seq(table.getName))) + } + assertEquals(1, incDesc.size) + assertEquals("Kudu Backup(incremental): test", incDesc.head) + + // Restore the table and verify the job descriptions. + val restoreDesc = withJobDescriptionCollector(ss.sparkContext) { () => + runRestore(createRestoreOptions(Seq(table.getName))) + } + assertEquals(2, restoreDesc.size) + assertTrue(restoreDesc.contains("Kudu Restore(1/2): test")) + assertTrue(restoreDesc.contains("Kudu Restore(2/2): test")) + } + + @Test + def testBackupAndRestoreWithNoRows(): Unit = { + backupAndValidateTable(tableName, 0, false) + backupAndValidateTable(tableName, 0, true) + restoreAndValidateTable(tableName, 0) + validateTablesMatch(tableName, s"$tableName-restore") + } + + @Test + def testBackupMissingTable(): Unit = { + // Check that a backup of a missing table fails fast with an exception if the + // fail-on-first-error option is set. + try { + val failFastOptions = createBackupOptions(Seq("missingTable")).copy(failOnFirstError = true) + runBackup(failFastOptions) + fail() + } catch { + case e: KuduException => assertTrue(e.getMessage.contains("the table does not exist")) + } + + // Check that a backup of a missing table does not fail fast or throw an exception with the + // default setting to not fail on individual table errors. The failure is indicated by the + // return value. + val options = createBackupOptions(Seq("missingTable")) + assertFalse(runBackup(options)) + } + + @Test + def testFailedTableBackupDoesNotFailOtherTableBackups() { + insertRows(table, 100) // Insert data into the default test table. + + // Run a fail-fast backup. It should fail because the first table doesn't exist. + // There's no guarantee about the order backups run in, so the table that does exist may or may + // not have been backed up. + val failFastOptions = + createBackupOptions(Seq("missingTable", tableName)).copy(failOnFirstError = true) + try { + KuduBackup.run(failFastOptions, ss) + fail() + } catch { + case e: KuduException => assertTrue(e.getMessage.contains("the table does not exist")) + } + + // Run a backup with the default setting to not fail on table errors. It should back up the + // table that does exist, it should not throw an exception, and it should return 1 to indicate + // some error. The logs should contain a message about the missing table. + val options = createBackupOptions(Seq("missingTable", tableName)) + val logs = captureLogs(() => assertFalse(KuduBackup.run(options, ss))) + assertTrue(logs.contains("the table does not exist")) + + // Restore the backup of the non-failed table and validate the end result. + restoreAndValidateTable(tableName, 100) + } + + @Test + def testRestoreWithNoBackup(): Unit = { + // Check that a restore of a table with no backups fails fast with an exception if the + // fail-on-first-error option is set. + val failFastOptions = createRestoreOptions(Seq(tableName)).copy(failOnFirstError = true) + try { + assertFalse(runRestore(failFastOptions)) + fail() + } catch { + case e: RuntimeException => + assertEquals(e.getMessage, s"No valid backups found for table: $tableName") + } + + // Check that a restore of a table with no backups does not fail fast or throw an exception if + // default no-fail-fast option is set. + assertFalse(runRestore(createRestoreOptions(Seq("missingTable")))) + } + + @Test + def testFailedTableRestoreDoesNotFailOtherTableRestores() { + insertRows(table, 100) + KuduBackup.run(createBackupOptions(Seq(tableName)), ss) + + // There's no guarantee about the order restores run in, so it doesn't work to test fail-fast + // and then the default no-fail-fast because the actual table may have been restored. + val logs = captureLogs( + () => assertFalse(runRestore(createRestoreOptions(Seq("missingTable", tableName))))) + assertTrue(logs.contains("Failed to restore table")) + } + + @Test + def testForceIncrementalBackup() { + insertRows(table, 100) // Insert data into the default test table. + Thread.sleep(1) // Ensure the previous insert is before beforeMs. + // Set beforeMs so we can force an incremental at this time. + val beforeMs = System.currentTimeMillis() + Thread.sleep(1) // Ensure the next insert is after beforeMs. + insertRows(table, 100, 100) // Insert more data. + + // Force an incremental backup without a full backup. + // It will use a diff scan and won't check the existing dependency graph. + val options = createBackupOptions(Seq(tableName), fromMs = beforeMs) + val logs = captureLogs { () => + assertTrue(runBackup(options)) + } + assertTrue(logs.contains("Performing an incremental backup: fromMs was set to")) + validateBackup(options, 100, true) + } + + @Test + def testForceFullBackup() { + insertRows(table, 100) // Insert data into the default test table. + // Backup the table so the following backup should be an incremental. + backupAndValidateTable(tableName, 100) + insertRows(table, 100, 100) // Insert more data. + + // Force a full backup. It should contain all the rows. + val options = createBackupOptions(Seq(tableName), forceFull = true) + val logs = captureLogs { () => + assertTrue(runBackup(options)) + } + assertTrue(logs.contains("Performing a full backup: forceFull was set to true")) + validateBackup(options, 200, false) + } + + @Test + def testSimpleBackupAndRestoreWithSpecialCharacters() { + // Use an Impala-style table name to verify url encoding/decoding of the table name works. + val impalaTableName = "impala::default.test" + + val tableOptions = new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1) + + kuduClient.createTable(impalaTableName, simpleSchema, tableOptions) + + backupAndValidateTable(tableName, 0) + restoreAndValidateTable(tableName, 0) + } + + @Test + def testRandomBackupAndRestore() { + val table = createRandomTable() + val tableName = table.getName + val maxRows = 200 + loadRandomData(table) + + // Run a full backup. Note that, here and below, we do not validate the backups against the + // generated rows. There may be duplicates in the generated rows. Instead, the restored table is + // checked against the original table, which is less exhaustive but still a good test. + val options = createBackupOptions(Seq(tableName)) + assertTrue(runBackup(options)) + + // Run 1 to 5 incremental backups. + val incrementalCount = random.nextInt(5) + 1 + (0 to incrementalCount).foreach { i => + loadRandomData(table, maxRows) + val incOptions = createBackupOptions(Seq(tableName)) + assertTrue(runBackup(incOptions)) + } + + assertTrue(runRestore(createRestoreOptions(Seq(tableName)))) + validateTablesMatch(tableName, s"$tableName-restore") + } + + @Test + def testBackupAndRestoreMultipleTables() { + val numRows = 1 + val table1Name = "table1" + val table2Name = "table2" + + val table1 = kuduClient.createTable(table1Name, schema, tableOptions) + val table2 = kuduClient.createTable(table2Name, schema, tableOptions) + + insertRows(table1, numRows) + insertRows(table2, numRows) + + assertTrue(runBackup(createBackupOptions(Seq(table1Name, table2Name)))) + assertTrue(runRestore(createRestoreOptions(Seq(table1Name, table2Name)))) + + val rdd1 = kuduContext.kuduRDD(ss.sparkContext, s"$table1Name-restore", List("key")) + assertEquals(numRows, rdd1.count()) + + val rdd2 = kuduContext.kuduRDD(ss.sparkContext, s"$table2Name-restore", List("key")) + assertEquals(numRows, rdd2.count()) + } + + @Test + def testParallelBackupAndRestore() { + val numRows = 1 + val tableNames = Range(0, 10).map { i => + val tableName = s"table$i" + val table = kuduClient.createTable(tableName, schema, tableOptions) + insertRows(table, numRows) + tableName + } + + assertTrue(runBackup(createBackupOptions(tableNames).copy(numParallelBackups = 3))) + assertTrue(runRestore(createRestoreOptions(tableNames).copy(numParallelRestores = 4))) + + tableNames.foreach { tableName => + val rdd = kuduContext.kuduRDD(ss.sparkContext, s"$tableName-restore", List("key")) + assertEquals(numRows, rdd.count()) + } + } + + @TabletServerConfig( + flags = Array( + "--flush_threshold_mb=1", + "--flush_threshold_secs=1", + // Disable rowset compact to prevent DRSs being merged because they are too small. + "--enable_rowset_compaction=false" + )) + @Test + def testBackupWithSplitSizeBytes() { + // Create a table with a single partition. + val tableName = "split-size-table" + val options = new CreateTableOptions().setRangePartitionColumns(List("key").asJava) + val table = kuduClient.createTable(tableName, schema, options) + + // Insert enough data into the test table so we can split it. + val rowCount = 1000 + upsertRowsWithRowDataSize(table, rowCount, 32 * 1024) + + // Wait for mrs flushed. + Thread.sleep(5 * 1000) + + // Run a backup job with custom splitSizeBytes and count the tasks. + val backupOptions = createBackupOptions(Seq(tableName)).copy(splitSizeBytes = Some(1024)) + val actualNumTasks = withJobTaskCounter(ss.sparkContext) { () => + assertTrue(runBackup(backupOptions)) + } + validateBackup(backupOptions, rowCount, false) + + // Verify there were more tasks than there are partitions. + assertTrue(actualNumTasks > 1) + } + + @Test + def testBackupAndRestoreTableWithManyPartitions(): Unit = { + val kNumPartitions = 100 + val tableName = "many-partitions-table" + + val options = new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1) + + // Add one range partition and create the table. Separate the range partition + // from the ones added later so there's a bounded, non-covered range. + val initialLower = schema.newPartialRow() + initialLower.addInt("key", -5) + val initialUpper = schema.newPartialRow() + initialUpper.addInt("key", -4) + options.addRangePartition(initialLower, initialUpper) + val table = kuduClient.createTable(tableName, schema, options) + + // Add the rest of the partitions via alter. + for (i <- 0 to kNumPartitions) { + val alterOptions = new AlterTableOptions() + val lower = schema.newPartialRow() + lower.addInt("key", i) + val upper = schema.newPartialRow() + upper.addInt("key", i + 1) + alterOptions.addRangePartition(lower, upper) + kuduClient.alterTable(tableName, alterOptions) + } + + // Insert some rows. Note that each row will go into a different range + // partition, and the initial partition will be empty. + insertRows(table, kNumPartitions) + + // Now backup and restore the table. + backupAndValidateTable(tableName, kNumPartitions) + restoreAndValidateTable(tableName, kNumPartitions) + } + + @Test + def testBackupAndRestoreTableWithNoRangePartitions(): Unit = { + val tableName = "only-hash-partitions-table" + + val options = new CreateTableOptions() + .addHashPartitions(List("key").asJava, 2) + .setNumReplicas(1) + val table1 = kuduClient.createTable(tableName, schema, options) + + val rowCount = 100 + insertRows(table1, rowCount) + + backupAndValidateTable(tableName, rowCount) + restoreAndValidateTable(tableName, rowCount) + } + + @Test + def testBackupAndRestoreNoRestoreOwner(): Unit = { + val rowCount = 100 + insertRows(table, rowCount) + + backupAndValidateTable(tableName, rowCount, false) + assertTrue(runRestore(createRestoreOptions(Seq(tableName)).copy(restoreOwner = false))) + validateTablesMatch(tableName, s"$tableName-restore", false) + } + + @Test + def testColumnAlterHandling(): Unit = { + // Create a basic table. + val tableName = "testColumnAlterHandling" + val columns = List( + new ColumnSchemaBuilder("key", Type.INT32).key(true).build(), + new ColumnSchemaBuilder("col_a", Type.STRING).build(), + new ColumnSchemaBuilder("col_b", Type.STRING).build(), + new ColumnSchemaBuilder("col_c", Type.STRING).build(), + new ColumnSchemaBuilder("col_d", Type.STRING).build() + ) + val schema = new Schema(columns.asJava) + val options = new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + var table = kuduClient.createTable(tableName, schema, options) + val session = kuduClient.newSession() + + // Insert some rows and take a full backup. + Range(0, 10).foreach { i => + val insert = table.newInsert + val row = insert.getRow + row.addInt("key", i) + row.addString("col_a", s"a$i") + row.addString("col_b", s"b$i") + row.addString("col_c", s"c$i") + row.addString("col_d", s"d$i") + session.apply(insert) + } + backupAndValidateTable(tableName, 10, false) + + // Rename col_a to col_1 and add a new col_a to ensure the column id's and defaults + // work correctly. Also drop col_d and rename col_c to ensure collisions on renaming + // columns don't occur when processing columns from left to right. + kuduClient.alterTable( + tableName, + new AlterTableOptions() + .renameColumn("col_a", "col_1") + .addColumn(new ColumnSchemaBuilder("col_a", Type.STRING) + .defaultValue("default") + .build()) + .dropColumn("col_b") + .dropColumn("col_d") + .renameColumn("col_c", "col_d") + ) + + // Insert more rows and take an incremental backup + table = kuduClient.openTable(tableName) + Range(10, 20).foreach { i => + val insert = table.newInsert + val row = insert.getRow + row.addInt("key", i) + row.addString("col_1", s"a$i") + row.addString("col_d", s"c$i") + session.apply(insert) + } + backupAndValidateTable(tableName, 10, true) + + // Restore the table and validate. + assertTrue(runRestore(createRestoreOptions(Seq(tableName)))) + + val restoreTable = kuduClient.openTable(s"$tableName-restore") + val scanner = kuduClient.newScannerBuilder(restoreTable).build() + val rows = scanner.asScala.toSeq + + // Validate there are still 20 rows. + assertEquals(20, rows.length) + // Validate col_b is dropped from all rows. + assertTrue(rows.forall(!_.getSchema.hasColumn("col_b"))) + // Validate the existing and renamed columns have the expected set of values. + val expectedSet = Range(0, 20).toSet + assertEquals(expectedSet, rows.map(_.getInt("key")).toSet) + assertEquals(expectedSet.map(i => s"a$i"), rows.map(_.getString("col_1")).toSet) + assertEquals(expectedSet.map(i => s"c$i"), rows.map(_.getString("col_d")).toSet) + // Validate the new col_a has all defaults. + assertTrue(rows.forall(_.getString("col_a") == "default")) + } + + @Test + def testPartitionAlterHandling(): Unit = { + // Create a basic table with 10 row range partitions covering 10 through 40. + val tableName = "testColumnAlterHandling" + val ten = createPartitionRow(10) + val twenty = createPartitionRow(20) + val thirty = createPartitionRow(30) + val fourty = createPartitionRow(40) + val options = new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .addRangePartition(ten, twenty) + .addRangePartition(twenty, thirty) + .addRangePartition(thirty, fourty) + val table = kuduClient.createTable(tableName, schema, options) + + // Fill the partitions with rows. + insertRows(table, 30, 10) + + // Run a full backup on the table. + backupAndValidateTable(tableName, 30, false) + + // Drop partition 10-20, drop and re-add partition 20-30, add partition 0-10 and 40-50. + // (drops 20 total rows) + val zero = createPartitionRow(0) + val fifty = createPartitionRow(50) + kuduClient.alterTable( + tableName, + new AlterTableOptions() + .dropRangePartition(ten, twenty) + .dropRangePartition(twenty, thirty) + .addRangePartition(twenty, thirty) + .addRangePartition(zero, ten) + .addRangePartition(fourty, fifty) + ) + + // Add some rows back to the new partitions (adds 15 total rows) + insertRows(table, 5, 0) + insertRows(table, 5, 20) + insertRows(table, 5, 40) + + // Run an incremental backup on the table. + backupAndValidateTable(tableName, 15, true) + + // Restore the table and validate. + assertTrue(runRestore(createRestoreOptions(Seq(tableName)))) + + val restoreTable = kuduClient.openTable(s"$tableName-restore") + val scanner = kuduClient.newScannerBuilder(restoreTable).build() + val rows = scanner.asScala.toList.map(_.getInt("key")).sorted + val expectedKeys = + (Range(0, 5) ++ Range(20, 25) ++ Range(30, 40) ++ Range(40, 45)).toList.sorted + + assertEquals(25, rows.length) + assertEquals(expectedKeys, rows) + } + + @Test + def testTableAlterHandling(): Unit = { + // Create the initial table and load it with data. + val tableName = "testTableAlterHandling" + var table = kuduClient.createTable(tableName, schema, tableOptions) + insertRows(table, 100) + + // Run and validate initial backup. + backupAndValidateTable(tableName, 100, false) + + // Rename the table and insert more rows + val newTableName = "impala::default.testTableAlterHandling" + kuduClient.alterTable(tableName, new AlterTableOptions().renameTable(newTableName)) + table = kuduClient.openTable(newTableName) + insertRows(table, 100, 100) + + // Run and validate an incremental backup. + backupAndValidateTable(newTableName, 100, true) + + // Create a new table with the old name. + val tableWithOldName = kuduClient.createTable(tableName, schema, tableOptions) + insertRows(tableWithOldName, 50) + + // Backup the table with the old name. + backupAndValidateTable(tableName, 50, false) + + // Restore the tables and check the row counts. + restoreAndValidateTable(newTableName, 200) + restoreAndValidateTable(tableName, 50) + } + + @Test + def testTableWithOnlyCustomHashSchemas(): Unit = { + // Create the initial table and load it with data. + val tableName = "testTableWithOnlyCustomHashSchemas" + val table = kuduClient.createTable(tableName, schema, tableOptionsWithCustomHashSchema) + insertRows(table, 100) + + // Run and validate initial backup. + backupAndValidateTable(tableName, 100, false) + + // Insert rows then run and validate an incremental backup. + insertRows(table, 100, 100) + backupAndValidateTable(tableName, 100, true) + + // Restore the table and check the row count. + restoreAndValidateTable(tableName, 200) + + // Check the range bounds and the hash schema of each range of the restored table. + val restoredTable = kuduClient.openTable(s"$tableName-restore") + assertEquals( + "[0 <= VALUES < 100 HASH(key) PARTITIONS 2, " + + "100 <= VALUES < 200 HASH(key) PARTITIONS 3]", + restoredTable.getFormattedRangePartitionsWithHashSchema(10000).toString + ) + } + + @Test + def testTableWithTableAndCustomHashSchemas(): Unit = { + // Create the initial table and load it with data. + val tableName = "testTableWithTableAndCustomHashSchemas" + val table = kuduClient.createTable(tableName, schema, tableOptionsWithTableAndCustomHashSchema) + insertRows(table, 100) + + // Run and validate initial backup. + backupAndValidateTable(tableName, 100, false) + + // Insert rows then run and validate an incremental backup. + insertRows(table, 200, 100) + backupAndValidateTable(tableName, 200, true) + + // Restore the table and check the row count. + restoreAndValidateTable(tableName, 300) + + // Check the range bounds and the hash schema of each range of the restored table. + val restoredTable = kuduClient.openTable(s"$tableName-restore") + assertEquals( + "[0 <= VALUES < 100 HASH(key) PARTITIONS 2, " + + "100 <= VALUES < 200 HASH(key) PARTITIONS 3, " + + "200 <= VALUES < 300 HASH(key) PARTITIONS 4]", + restoredTable.getFormattedRangePartitionsWithHashSchema(10000).toString + ) + } + + @Test + def testTableAlterWithTableAndCustomHashSchemas(): Unit = { + // Create the initial table and load it with data. + val tableName = "testTableAlterWithTableAndCustomHashSchemas" + var table = kuduClient.createTable(tableName, schema, tableOptionsWithTableAndCustomHashSchema) + insertRows(table, 100) + + // Run and validate initial backup. + backupAndValidateTable(tableName, 100, false) + + // Insert rows then run and validate an incremental backup. + insertRows(table, 200, 100) + backupAndValidateTable(tableName, 200, true) + + // Drops range partition with table wide hash schema and re-adds same range partition with + // custom hash schema, also adds another range partition with custom hash schema through alter. + val twoHundred = createPartitionRow(200) + val threeHundred = createPartitionRow(300) + val fourHundred = createPartitionRow(400) + val newPartition = new RangePartitionWithCustomHashSchema( + twoHundred, + threeHundred, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND) + newPartition.addHashPartitions(List("key").asJava, 5, 0) + val newPartition1 = new RangePartitionWithCustomHashSchema( + threeHundred, + fourHundred, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND) + newPartition1.addHashPartitions(List("key").asJava, 6, 0) + kuduClient.alterTable( + tableName, + new AlterTableOptions() + .dropRangePartition(twoHundred, threeHundred) + .addRangePartition(newPartition) + .addRangePartition(newPartition1)) + + // TODO: Avoid this table refresh by updating partition schema after alter table calls. + // See https://issues.apache.org/jira/browse/KUDU-3388 for more details. + table = kuduClient.openTable(tableName) + + // Insert rows then run and validate an incremental backup. + insertRows(table, 100, 300) + backupAndValidateTable(tableName, 100, true) + + // Restore the table and validate. + assertTrue(runRestore(createRestoreOptions(Seq(tableName)))) + + // Check the range bounds and the hash schema of each range of the restored table. + val restoredTable = kuduClient.openTable(s"$tableName-restore") + assertEquals( + "[0 <= VALUES < 100 HASH(key) PARTITIONS 2, " + + "100 <= VALUES < 200 HASH(key) PARTITIONS 3, " + + "200 <= VALUES < 300 HASH(key) PARTITIONS 5, " + + "300 <= VALUES < 400 HASH(key) PARTITIONS 6]", + restoredTable.getFormattedRangePartitionsWithHashSchema(10000).toString + ) + } + + @Test + def testTableNameChangeFlags() { + // Create four tables and load data + val rowCount = 100 + val tableNameWithImpalaPrefix = "impala::oldDatabase.testTableWithImpalaPrefix" + val tableWithImpalaPrefix = + kuduClient.createTable(tableNameWithImpalaPrefix, schema, tableOptions) + val tableNameWithoutImpalaPrefix = "oldDatabase.testTableWithoutImpalaPrefix" + val tableWithoutImpalaPrefix = + kuduClient.createTable(tableNameWithoutImpalaPrefix, schema, tableOptions) + val tableNameWithImpalaPrefixWithoutDb = "impala::testTableWithImpalaPrefixWithoutDb" + val tableWithImpalaPrefixWithoutDb = + kuduClient.createTable(tableNameWithImpalaPrefixWithoutDb, schema, tableOptions) + val tableNameWithoutImpalaPrefixWithoutDb = "testTableWithoutImpalaPrefixWithoutDb" + val tableWithoutImpalaPrefixWithoutDb = + kuduClient.createTable(tableNameWithoutImpalaPrefixWithoutDb, schema, tableOptions) + insertRows(tableWithImpalaPrefix, rowCount) + insertRows(tableWithoutImpalaPrefix, rowCount) + insertRows(tableWithImpalaPrefixWithoutDb, rowCount) + insertRows(tableWithoutImpalaPrefixWithoutDb, rowCount) + + // Backup the four tables + backupAndValidateTable(tableNameWithImpalaPrefix, rowCount, false) + backupAndValidateTable(tableNameWithoutImpalaPrefix, rowCount, false) + backupAndValidateTable(tableNameWithImpalaPrefixWithoutDb, rowCount, false) + backupAndValidateTable(tableNameWithoutImpalaPrefixWithoutDb, rowCount, false) + + // Restore with removeImpalaPrefix = true and newDatabase = newDatabase and validate the tables + val withImpalaPrefix = + createRestoreOptions(Seq(tableNameWithImpalaPrefix)) + .copy(removeImpalaPrefix = true, newDatabaseName = "newDatabase") + assertTrue(runRestore(withImpalaPrefix)) + val rddWithImpalaPrefix = + kuduContext + .kuduRDD(ss.sparkContext, s"newDatabase.testTableWithImpalaPrefix-restore") + assertEquals(rddWithImpalaPrefix.collect.length, rowCount) + + val withoutImpalaPrefix = + createRestoreOptions(Seq(tableNameWithoutImpalaPrefix)) + .copy(removeImpalaPrefix = true, newDatabaseName = "newDatabase") + assertTrue(runRestore(withoutImpalaPrefix)) + val rddWithoutImpalaPrefix = + kuduContext.kuduRDD(ss.sparkContext, s"newDatabase.testTableWithoutImpalaPrefix-restore") + assertEquals(rddWithoutImpalaPrefix.collect.length, rowCount) + + val withImpalaPrefixWithoutDb = + createRestoreOptions(Seq(tableNameWithImpalaPrefixWithoutDb)) + .copy(removeImpalaPrefix = true, newDatabaseName = "newDatabase") + assertTrue(runRestore(withImpalaPrefixWithoutDb)) + val rddWithImpalaPrefixWithoutDb = + kuduContext + .kuduRDD(ss.sparkContext, s"newDatabase.testTableWithImpalaPrefixWithoutDb-restore") + assertEquals(rddWithImpalaPrefixWithoutDb.collect.length, rowCount) + + val withoutImpalaPrefixWithoutDb = + createRestoreOptions(Seq(tableNameWithoutImpalaPrefixWithoutDb)) + .copy(removeImpalaPrefix = true, newDatabaseName = "newDatabase") + assertTrue(runRestore(withoutImpalaPrefixWithoutDb)) + val rddWithoutImpalaPrefixWithoutDb = + kuduContext + .kuduRDD(ss.sparkContext, s"newDatabase.testTableWithoutImpalaPrefixWithoutDb-restore") + assertEquals(rddWithoutImpalaPrefixWithoutDb.collect.length, rowCount) + } + + @Test + def testDeleteIgnore(): Unit = { + doDeleteIgnoreTest() + } + + /** + * Identical to the above test, but exercising the old session based delete ignore operations, + * ensuring we functionally support the same semantics. + */ + @Test + @KuduTestHarness.MasterServerConfig(flags = Array("--master_support_ignore_operations=false")) + def testLegacyDeleteIgnore(): Unit = { + doDeleteIgnoreTest() + } + + def doDeleteIgnoreTest(): Unit = { + insertRows(table, 100) // Insert data into the default test table. + + // Run and validate initial backup. + backupAndValidateTable(tableName, 100, false) + + // Delete the rows and validate incremental backup. + Range(0, 100).foreach(deleteRow) + backupAndValidateTable(tableName, 100, true) + + // When restoring the table, delete half the rows after each job completes. + // This will force delete rows to cause NotFound errors and allow validation + // that they are correctly handled. + val listener = new SparkListener { + override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { + val client = kuduContext.syncClient + val table = client.openTable(s"$tableName-restore") + val scanner = kuduContext.syncClient.newScannerBuilder(table).build() + val session = client.newSession() + scanner.asScala.foreach { rr => + if (rr.getInt("key") % 2 == 0) { + val delete = table.newDelete() + val row = delete.getRow + row.addInt("key", rr.getInt("key")) + session.apply(delete) + } + } + } + } + ss.sparkContext.addSparkListener(listener) + + restoreAndValidateTable(tableName, 0) + } + + def createPartitionRow(value: Int): PartialRow = { + val row = schema.newPartialRow() + row.addInt("key", value) + row + } + + def createRandomTable(): KuduTable = { + val columnCount = random.nextInt(50) + 1 // At least one column. + val keyColumnCount = random.nextInt(columnCount) + 1 // At least one key. + val schemaGenerator = new SchemaGeneratorBuilder() + .random(random) + .columnCount(columnCount) + .keyColumnCount(keyColumnCount) + .build() + val schema = schemaGenerator.randomSchema() + val options = schemaGenerator.randomCreateTableOptions(schema) + options.setNumReplicas(1) + val name = s"random-${System.currentTimeMillis()}" + kuduClient.createTable(name, schema, options) + } + + def loadRandomData(table: KuduTable, maxRows: Int = 200): IndexedSeq[PartialRow] = { + val kuduSession = kuduClient.newSession() + val dataGenerator = new DataGeneratorBuilder() + .random(random) + .build() + val rowCount = random.nextInt(maxRows) + (0 to rowCount).map { i => + val upsert = table.newUpsert() + val row = upsert.getRow + dataGenerator.randomizeRow(row) + kuduSession.apply(upsert) + row + } + } + + /** + * A convenience method to create backup options for tests. + * + * We add one millisecond to our target snapshot time (toMs). This will ensure we read all of + * the records in the backup and prevent flaky off-by-one errors. The underlying reason for + * adding 1 ms is that we pass the timestamp in millisecond granularity but the snapshot time + * has microsecond granularity. This means if the test runs fast enough that data is inserted + * with the same millisecond value as nowMs (after truncating the micros) the records inserted + * in the microseconds after truncation could be unread. + */ + def createBackupOptions( + tableNames: Seq[String], + toMs: Long = System.currentTimeMillis() + 1, + fromMs: Long = BackupOptions.DefaultFromMS, + forceFull: Boolean = false): BackupOptions = { + BackupOptions( + rootPath = rootDir.toUri.toString, + tables = tableNames, + kuduMasterAddresses = harness.getMasterAddressesAsString, + fromMs = fromMs, + toMs = toMs, + forceFull = forceFull + ) + } + + /** + * A convenience method to create backup options for tests. + */ + def createRestoreOptions( + tableNames: Seq[String], + tableSuffix: String = "-restore"): RestoreOptions = { + RestoreOptions( + rootPath = rootDir.toUri.toString, + tables = tableNames, + kuduMasterAddresses = harness.getMasterAddressesAsString, + tableSuffix = tableSuffix + ) + } + + def backupAndValidateTable( + tableName: String, + expectedRowCount: Long, + expectIncremental: Boolean = false) = { + val options = createBackupOptions(Seq(tableName)) + // Run the backup. + assertTrue(runBackup(options)) + validateBackup(options, expectedRowCount, expectIncremental) + } + + def runBackup(options: BackupOptions): Boolean = { + // Log the timestamps to simplify flaky debugging. + log.info(s"nowMs: ${System.currentTimeMillis()}") + val hts = HybridTimeUtil.HTTimestampToPhysicalAndLogical(kuduClient.getLastPropagatedTimestamp) + log.info(s"propagated physicalMicros: ${hts(0)}") + log.info(s"propagated logical: ${hts(1)}") + KuduBackup.run(options, ss) + } + + def validateBackup( + options: BackupOptions, + expectedRowCount: Long, + expectIncremental: Boolean): Unit = { + val io = new BackupIO(ss.sparkContext.hadoopConfiguration, options.rootPath) + val tableName = options.tables.head + val table = harness.getClient.openTable(tableName) + val backupPath = io.backupPath(table.getTableId, table.getName, options.toMs) + val metadataPath = io.backupMetadataPath(backupPath) + val metadata = io.readTableMetadata(metadataPath) + + // Verify the backup type. + if (expectIncremental) { + assertNotEquals(metadata.getFromMs, 0) + } else { + assertEquals(metadata.getFromMs, 0) + } + + // Verify the output data. + val schema = BackupUtils.dataSchema(table.getSchema, expectIncremental) + val df = ss.sqlContext.read + .format(metadata.getDataFormat) + .schema(schema) + .load(backupPath.toString) + assertEquals(expectedRowCount, df.collect.length) + } + + def restoreAndValidateTable(tableName: String, expectedRowCount: Long) = { + val options = createRestoreOptions(Seq(tableName)) + assertTrue(runRestore(options)) + val restoreTableName = KuduRestore.getRestoreTableName(tableName, options) + val rdd = kuduContext.kuduRDD(ss.sparkContext, s"$restoreTableName") + assertEquals(rdd.collect.length, expectedRowCount) + } + + def runRestore(options: RestoreOptions): Boolean = { + KuduRestore.run(options, ss) + } + + def validateTablesMatch(tableA: String, tableB: String, ownersMatch: Boolean = true): Unit = { + val tA = kuduClient.openTable(tableA) + val tB = kuduClient.openTable(tableB) + if (ownersMatch) { + assertEquals(tA.getOwner, tB.getOwner) + } else { + assertNotEquals(tA.getOwner, tB.getOwner) + } + assertNotEquals("", tA.getOwner); + assertEquals(tA.getComment, tB.getComment) + assertEquals(tA.getNumReplicas, tB.getNumReplicas) + assertTrue(schemasMatch(tA.getSchema, tB.getSchema)) + assertTrue(partitionSchemasMatch(tA.getPartitionSchema, tB.getPartitionSchema)) + } + + def schemasMatch(before: Schema, after: Schema): Boolean = { + if (before eq after) return true + if (before.getColumns.size != after.getColumns.size) return false + (0 until before.getColumns.size).forall { i => + columnsMatch(before.getColumnByIndex(i), after.getColumnByIndex(i)) + } + } + + def columnsMatch(before: ColumnSchema, after: ColumnSchema): Boolean = { + if (before eq after) return true + Objects.equal(before.getName, after.getName) && + Objects.equal(before.getType, after.getType) && + Objects.equal(before.isKey, after.isKey) && + Objects.equal(before.isNullable, after.isNullable) && + defaultValuesMatch(before.getDefaultValue, after.getDefaultValue) && + Objects.equal(before.getDesiredBlockSize, after.getDesiredBlockSize) && + Objects.equal(before.getEncoding, after.getEncoding) && + Objects + .equal(before.getCompressionAlgorithm, after.getCompressionAlgorithm) && + Objects.equal(before.getTypeAttributes, after.getTypeAttributes) + Objects.equal(before.getComment, after.getComment) + } + + // Special handling because default values can be a byte array which is not + // handled by Guava's Objects.equals. + // See https://github.com/google/guava/issues/1425 + def defaultValuesMatch(before: Any, after: Any): Boolean = { + if (before.isInstanceOf[Array[Byte]] && after.isInstanceOf[Array[Byte]]) { + util.Objects.deepEquals(before, after) + } else { + Objects.equal(before, after) + } + } + + def partitionSchemasMatch(before: PartitionSchema, after: PartitionSchema): Boolean = { + if (before eq after) return true + val beforeBuckets = before.getHashBucketSchemas.asScala + val afterBuckets = after.getHashBucketSchemas.asScala + if (beforeBuckets.size != afterBuckets.size) return false + val hashBucketsMatch = (0 until beforeBuckets.size).forall { i => + HashBucketSchemasMatch(beforeBuckets(i), afterBuckets(i)) + } + val beforeRangeHashSchemas = before.getRangesWithHashSchemas.asScala + val afterRangeHashSchemas = after.getRangesWithHashSchemas.asScala + if (beforeRangeHashSchemas.size != afterRangeHashSchemas.size) return false + for (i <- 0 until beforeRangeHashSchemas.size) { + val beforeHashSchemas = beforeRangeHashSchemas(i).hashSchemas.asScala + val afterHashSchemas = afterRangeHashSchemas(i).hashSchemas.asScala + if (beforeHashSchemas.size != afterHashSchemas.size) return false + for (j <- 0 until beforeHashSchemas.size) { + if (!HashBucketSchemasMatch(beforeHashSchemas(j), afterHashSchemas(j))) return false + } + if (!Objects.equal(beforeRangeHashSchemas(i).lowerBound, afterRangeHashSchemas(i).lowerBound) + || !Objects + .equal(beforeRangeHashSchemas(i).upperBound, afterRangeHashSchemas(i).upperBound)) + return false + } + hashBucketsMatch && + Objects.equal(before.getRangeSchema.getColumnIds, after.getRangeSchema.getColumnIds) + } + + def HashBucketSchemasMatch(before: HashBucketSchema, after: HashBucketSchema): Boolean = { + if (before eq after) return true + Objects.equal(before.getColumnIds, after.getColumnIds) && + Objects.equal(before.getNumBuckets, after.getNumBuckets) && + Objects.equal(before.getSeed, after.getSeed) + } + + /** + * Captures the logs while the wrapped function runs and returns them as a String. + */ + def captureLogs(f: () => Unit): String = { + val logs = new CapturingLogAppender() + val handle = logs.attach() + try { + f() + } finally { + handle.close() + } + logs.getAppendedText + } +} diff --git a/java-scala-spark4/kudu-backup/src/test/scala/org/apache/kudu/backup/TestOptions.scala b/java-scala-spark4/kudu-backup/src/test/scala/org/apache/kudu/backup/TestOptions.scala new file mode 100644 index 0000000000..b65bd38ac9 --- /dev/null +++ b/java-scala-spark4/kudu-backup/src/test/scala/org/apache/kudu/backup/TestOptions.scala @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.kudu.backup + +import org.apache.kudu.spark.kudu.KuduTestSuite +import org.junit.Assert._ +import org.junit.Test + +class TestOptions extends KuduTestSuite { + + @Test + def testBackupOptionsHelp() { + val expectedStr = + """Usage: spark-submit --class org.apache.kudu.backup.KuduBackup [spark-options] [options]
... + | + | --rootPath The root path to output backup data. Accepts any Spark compatible path. + | --kuduMasterAddresses + | Comma-separated addresses of Kudu masters. Default: localhost + | --forceFull If true, this will be a full backup even if another full already exists. Default: false + | --fromMs A UNIX timestamp in milliseconds that defines the start time of an incremental backup. If unset, the fromMs will be defined by previous backups in the root directory. + | --timestampMs A UNIX timestamp in milliseconds since the epoch to execute scans at. Default: `System.currentTimeMillis()` + | --scanBatchSize The maximum number of bytes returned by the scanner, on each batch. Default: 20971520 + | --scanRequestTimeoutMs + | Sets how long in milliseconds each scan request to a server can last. Default: 30000 + | --keepAlivePeriodMs + | Sets the period at which to send keep-alive requests to the tablet server to ensure that scanners do not time out. Default: 15000 + | --failOnFirstError Whether to fail the backup job as soon as a single table backup fails. Default: false + | --help prints this usage text + |
... A list of tables to be backed up.""".stripMargin + assertEquals(expectedStr, BackupOptions.parser.usage) + } + + @Test + def testRestoreOptionsHelp() { + val expectedStr = + """Usage: spark-submit --class org.apache.kudu.backup.KuduRestore [spark-options] [options]
... + | + | --rootPath The root path to the backup data. Accepts any Spark compatible path. + | --kuduMasterAddresses + | Comma-separated addresses of Kudu masters. Default: localhost + | --createTables If true, create the tables during restore. Set to false if the target tables already exist. Default: true + | --removeImpalaPrefix + | If true, removes the "impala::" prefix, if present from the restored table names. This is advisable if backup was taken in a Kudu cluster without HMS sync and restoring to Kudu cluster which has HMS sync in place. Only used when createTables is true. Default: false + | --newDatabaseName + | If set, replaces the existing database name and if there is no existing database name, a new database name is added. Setting this to an empty string will have the same effect of not using the flag at all. For example, if this is set to newdb for the tables testtable and impala::db.testtable the restored tables will have the names newdb.testtable and impala::newdb.testtable respectively, assuming removeImpalaPrefix is set to false + | --tableSuffix If set, the suffix to add to the restored table names. Only used when createTables is true. + | --timestampMs A UNIX timestamp in milliseconds that defines the latest time to use when selecting restore candidates. Default: `System.currentTimeMillis()` + | --failOnFirstError Whether to fail the restore job as soon as a single table restore fails. Default: false + | --restoreOwner If true, it restores table ownership when creating new tables, otherwise creates tables as the logged in user. Only used when createTables is true. Default: true + | --help prints this usage text + |
... A list of tables to be restored.""".stripMargin + assertEquals(expectedStr, RestoreOptions.parser.usage) + } +} diff --git a/java-scala-spark4/kudu-client/build.gradle b/java-scala-spark4/kudu-client/build.gradle new file mode 100644 index 0000000000..082b7d9301 --- /dev/null +++ b/java-scala-spark4/kudu-client/build.gradle @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +apply from: "$rootDir/gradle/shadow.gradle" +apply from: "$rootDir/gradle/benchmarks.gradle" + +dependencies { + compile project(path: ":kudu-proto") + // Not shaded in the client JAR because it's part of the public API. + compileUnshaded(libs.async) { + // async uses versions ranges for slf4j making builds non-deterministic. + // Remove this once the following is merged: https://github.com/OpenTSDB/async/pull/8 + exclude group: "org.slf4j", module: "slf4j-api" + } + compileUnshaded libs.slf4jApi + + compile libs.guava + compile libs.micrometerCore + compile libs.murmur + compile libs.netty + + optional libs.jsr305 + optional libs.yetusAnnotations + + testCompile project(":kudu-test-utils") + testCompile libs.junit + testCompile libs.log4jApi + testCompile libs.log4jCore + testCompile libs.log4jSlf4jImpl + testCompile libs.mockitoCore +} diff --git a/java-scala-spark4/kudu-client/src/jmh/java/org/apache/kudu/client/RowResultIteratorBenchmark.java b/java-scala-spark4/kudu-client/src/jmh/java/org/apache/kudu/client/RowResultIteratorBenchmark.java new file mode 100644 index 0000000000..ceb682091f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/jmh/java/org/apache/kudu/client/RowResultIteratorBenchmark.java @@ -0,0 +1,201 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.List; + +import io.netty.util.CharsetUtil; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.infra.Blackhole; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.util.Slice; +import org.apache.kudu.util.Slices; + +@State(Scope.Thread) +@Fork(1) +public class RowResultIteratorBenchmark { + + final Schema schema; + + // for rowwise result + Slice rowwiseBs; + Slice rowwiseIndirectBs; + + // for columnar result + Slice[] columnarData; + Slice[] columnarVarlenData; + Slice[] columnarNonNullBitmaps; + + @Param({"true", "false"}) + boolean reuseResultRow; + + @Param({"1", "10", "10000"}) + int numRows; + + public RowResultIteratorBenchmark() { + List columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("action", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("time", Type.INT32).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("seq", Type.INT64).key(true).build()); + this.schema = new Schema(columns); + } + + @Setup + public void prepare() { + prepareRowwiseSlices(); + prepareColumnarSlices(); + } + + private void prepareRowwiseSlices() { + byte[] data = new byte[numRows * schema.getRowSize()]; + byte[] vardata = new byte[numRows * 10]; + + int offset = 0; + int vardataOffset = 0; + + for (int i = 0; i < numRows; i++) { + String action = "action" + i; + int actionLen = action.getBytes(CharsetUtil.UTF_8).length; + + offset += writeLong(data, offset, vardataOffset); + offset += writeLong(data, offset, actionLen); + offset += writeInt(data, offset, i); + offset += writeLong(data, offset, i * 10000L); + + vardataOffset += writeString(vardata, vardataOffset, action); + } + + rowwiseBs = Slices.wrappedBuffer(data); + rowwiseIndirectBs = Slices.wrappedBuffer(vardata); + } + + private void prepareColumnarSlices() { + byte[][] data = new byte[3][]; + data[0] = new byte[4 * (numRows + 1)]; + data[1] = new byte[4 * numRows]; + data[2] = new byte[8 * numRows]; + + byte[][] varData = new byte[3][]; + varData[0] = new byte[numRows * 10]; + varData[1] = new byte[0]; + varData[2] = new byte[0]; + + byte[][] nonNullBitmaps = new byte[3][]; + nonNullBitmaps[0] = new byte[0]; + nonNullBitmaps[1] = new byte[0]; + nonNullBitmaps[2] = new byte[0]; + + int dataOffset0 = 0; + int dataOffset1 = 0; + int dataOffset2 = 0; + + int varDataOffset0 = 0; + + for (int i = 0; i < numRows; i++) { + String action = "action" + i; + + dataOffset0 += writeInt(data[0], dataOffset0, varDataOffset0); + varDataOffset0 += writeString(varData[0], varDataOffset0, action); + + dataOffset1 += writeInt(data[1], dataOffset1, i); + dataOffset2 += writeLong(data[2], dataOffset2, i * 10000L); + } + // write offset for last row. + writeInt(data[0], dataOffset0, varDataOffset0); + + columnarData = new Slice[3]; + columnarVarlenData = new Slice[3]; + columnarNonNullBitmaps = new Slice[3]; + for (int i = 0; i < 3; i++) { + columnarData[i] = Slices.wrappedBuffer(data[i]); + columnarVarlenData[i] = Slices.wrappedBuffer(varData[i]); + columnarNonNullBitmaps[i] = Slices.wrappedBuffer(nonNullBitmaps[i]); + } + } + + @Benchmark + public void testRowwiseResult(Blackhole blackhole) { + RowResultIterator iter = new RowwiseRowResultIterator( + 0, "uuid", schema, numRows, + rowwiseBs, rowwiseIndirectBs, reuseResultRow); + + while (iter.hasNext()) { + RowResult row = iter.next(); + String action = row.getString(0); + int time = row.getInt(1); + long seq = row.getLong(2); + + blackhole.consume(action); + blackhole.consume(time); + blackhole.consume(seq); + } + } + + @Benchmark + public void testColumnarResult(Blackhole blackhole) { + RowResultIterator iter = new ColumnarRowResultIterator( + 0, "uuid", schema, numRows, + columnarData, columnarVarlenData, columnarNonNullBitmaps, reuseResultRow); + + while (iter.hasNext()) { + RowResult row = iter.next(); + String action = row.getString(0); + int time = row.getInt(1); + long seq = row.getLong(2); + + blackhole.consume(action); + blackhole.consume(time); + blackhole.consume(seq); + } + } + + private static int writeInt(final byte[] b, final int offset, final int value) { + b[offset + 0] = (byte) (value >> 0); + b[offset + 1] = (byte) (value >> 8); + b[offset + 2] = (byte) (value >> 16); + b[offset + 3] = (byte) (value >> 24); + return 4; + } + + private static int writeLong(final byte[] b, final int offset, final long value) { + b[offset + 0] = (byte) (value >> 0); + b[offset + 1] = (byte) (value >> 8); + b[offset + 2] = (byte) (value >> 16); + b[offset + 3] = (byte) (value >> 24); + b[offset + 4] = (byte) (value >> 32); + b[offset + 5] = (byte) (value >> 40); + b[offset + 6] = (byte) (value >> 48); + b[offset + 7] = (byte) (value >> 56); + return 8; + } + + private static int writeString(final byte[] b, final int offset, final String value) { + byte[] data = value.getBytes(CharsetUtil.UTF_8); + System.arraycopy(data, 0, b, offset, data.length); + return data.length; + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/ColumnSchema.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/ColumnSchema.java new file mode 100644 index 0000000000..26b679deb5 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/ColumnSchema.java @@ -0,0 +1,597 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu; + +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Common.EncodingType; +import org.apache.kudu.Compression.CompressionType; +import org.apache.kudu.util.CharUtil; + +/** + * Represents a Kudu Table column. Use {@link ColumnSchema.ColumnSchemaBuilder} in order to + * create columns. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class ColumnSchema { + + private final String name; + private final Type type; + private final boolean key; + private final boolean keyUnique; + private final boolean nullable; + private final boolean immutable; + private final boolean autoIncrementing; + private final Object defaultValue; + private final int desiredBlockSize; + private final Encoding encoding; + private final CompressionAlgorithm compressionAlgorithm; + private final ColumnTypeAttributes typeAttributes; + private final int typeSize; + private final Common.DataType wireType; + private final String comment; + + /** + * Specifies the encoding of data for a column on disk. + * Not all encodings are available for all data types. + * Refer to the Kudu documentation for more information on each encoding. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public enum Encoding { + UNKNOWN(EncodingType.UNKNOWN_ENCODING), + AUTO_ENCODING(EncodingType.AUTO_ENCODING), + PLAIN_ENCODING(EncodingType.PLAIN_ENCODING), + PREFIX_ENCODING(EncodingType.PREFIX_ENCODING), + GROUP_VARINT(EncodingType.GROUP_VARINT), + RLE(EncodingType.RLE), + DICT_ENCODING(EncodingType.DICT_ENCODING), + BIT_SHUFFLE(EncodingType.BIT_SHUFFLE); + + final EncodingType internalPbType; + + Encoding(EncodingType internalPbType) { + this.internalPbType = internalPbType; + } + + @InterfaceAudience.Private + public EncodingType getInternalPbType() { + return internalPbType; + } + } + + /** + * Specifies the compression algorithm of data for a column on disk. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public enum CompressionAlgorithm { + UNKNOWN(CompressionType.UNKNOWN_COMPRESSION), + DEFAULT_COMPRESSION(CompressionType.DEFAULT_COMPRESSION), + NO_COMPRESSION(CompressionType.NO_COMPRESSION), + SNAPPY(CompressionType.SNAPPY), + LZ4(CompressionType.LZ4), + ZLIB(CompressionType.ZLIB); + + final CompressionType internalPbType; + + CompressionAlgorithm(CompressionType internalPbType) { + this.internalPbType = internalPbType; + } + + @InterfaceAudience.Private + public CompressionType getInternalPbType() { + return internalPbType; + } + } + + private ColumnSchema(String name, Type type, boolean key, boolean keyUnique, + boolean nullable, boolean immutable, boolean autoIncrementing, + Object defaultValue, int desiredBlockSize, Encoding encoding, + CompressionAlgorithm compressionAlgorithm, + ColumnTypeAttributes typeAttributes, Common.DataType wireType, + String comment) { + this.name = name; + this.type = type; + this.key = key; + this.keyUnique = keyUnique; + this.nullable = nullable; + this.immutable = immutable; + this.autoIncrementing = autoIncrementing; + this.defaultValue = defaultValue; + this.desiredBlockSize = desiredBlockSize; + this.encoding = encoding; + this.compressionAlgorithm = compressionAlgorithm; + this.typeAttributes = typeAttributes; + this.typeSize = type.getSize(typeAttributes); + this.wireType = wireType; + this.comment = comment; + } + + /** + * Get the column's Type + * @return the type + */ + public Type getType() { + return type; + } + + /** + * Get the column's name + * @return A string representation of the name + */ + public String getName() { + return name; + } + + /** + * Answers if the column part of the key + * @return true if the column is part of the key, else false + */ + public boolean isKey() { + return key; + } + + /** + * Answers if the key is unique + * @return true if the key is unique + */ + public boolean isKeyUnique() { + return keyUnique; + } + + /** + * Answers if the column can be set to null + * @return true if it can be set to null, else false + */ + public boolean isNullable() { + return nullable; + } + + /** + * Answers if the column is immutable + * @return true if it is immutable, else false + */ + public boolean isImmutable() { + return immutable; + } + + /** + * Answers if the column is auto-incrementing column + * @return true if the column value is automatically assigned with incrementing value + */ + public boolean isAutoIncrementing() { + return autoIncrementing; + } + + /** + * The Java object representation of the default value that's read + * @return the default read value + */ + public Object getDefaultValue() { + return defaultValue; + } + + /** + * Gets the desired block size for this column. + * If no block size has been explicitly specified for this column, + * returns 0 to indicate that the server-side default will be used. + * + * @return the block size, in bytes, or 0 if none has been configured. + */ + public int getDesiredBlockSize() { + return desiredBlockSize; + } + + /** + * Return the encoding of this column, or null if it is not known. + */ + public Encoding getEncoding() { + return encoding; + } + + /** + * Return the compression algorithm of this column, or null if it is not known. + */ + public CompressionAlgorithm getCompressionAlgorithm() { + return compressionAlgorithm; + } + + /** + * Return the column type attributes for the column, or null if it is not known. + */ + public ColumnTypeAttributes getTypeAttributes() { + return typeAttributes; + } + + /** + * Get the column's underlying DataType. + */ + @InterfaceAudience.Private + public Common.DataType getWireType() { + return wireType; + } + + /** + * The size of this type in bytes on the wire. + * @return A size + */ + public int getTypeSize() { + return typeSize; + } + + /** + * Return the comment for the column. An empty string means there is no comment. + */ + public String getComment() { + return comment; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof ColumnSchema)) { + return false; + } + ColumnSchema that = (ColumnSchema) o; + return Objects.equals(name, that.name) && + Objects.equals(type, that.type) && + Objects.equals(key, that.key) && + Objects.equals(keyUnique, that.keyUnique) && + Objects.equals(autoIncrementing, that.autoIncrementing) && + Objects.equals(typeAttributes, that.typeAttributes) && + Objects.equals(comment, that.comment); + } + + @Override + public int hashCode() { + return Objects.hash(name, type, key, typeAttributes, comment); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Column name: "); + sb.append(name); + sb.append(", type: "); + sb.append(type.getName()); + if (typeAttributes != null) { + sb.append(typeAttributes.toStringForType(type)); + } + if (!comment.isEmpty()) { + sb.append(", comment: "); + sb.append(comment); + } + return sb.toString(); + } + + /** + * Builder for ColumnSchema. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static class ColumnSchemaBuilder { + private static final List TYPES_WITH_ATTRIBUTES = Arrays.asList(Type.DECIMAL, + Type.VARCHAR); + private final String name; + private final Type type; + private boolean key = false; + private boolean keyUnique = false; + private boolean nullable = false; + private boolean immutable = false; + private Object defaultValue = null; + private int desiredBlockSize = 0; + private Encoding encoding = null; + private CompressionAlgorithm compressionAlgorithm = null; + private ColumnTypeAttributes typeAttributes = null; + private Common.DataType wireType = null; + private String comment = ""; + + /** + * Constructor for the required parameters. + * @param name column's name + * @param type column's type + * @throws IllegalArgumentException if the column's name equals the reserved + * auto-incrementing column name + */ + public ColumnSchemaBuilder(String name, Type type) { + if (name.equalsIgnoreCase(Schema.getAutoIncrementingColumnName())) { + throw new IllegalArgumentException("Column name " + + Schema.getAutoIncrementingColumnName() + " is reserved by Kudu engine"); + } + this.name = name; + this.type = type; + } + + /** + * Constructor to copy an existing columnSchema + * @param that the columnSchema to copy + */ + public ColumnSchemaBuilder(ColumnSchema that) { + this.name = that.name; + this.type = that.type; + this.key = that.key; + this.keyUnique = that.keyUnique; + this.nullable = that.nullable; + this.immutable = that.immutable; + this.defaultValue = that.defaultValue; + this.desiredBlockSize = that.desiredBlockSize; + this.encoding = that.encoding; + this.compressionAlgorithm = that.compressionAlgorithm; + this.typeAttributes = that.typeAttributes; + this.wireType = that.wireType; + this.comment = that.comment; + } + + /** + * Sets if the column is part of the row key. False by default. + * This function call overrides any previous key() and nonUniqueKey() call. + * @param key a boolean that indicates if the column is part of the key + * @return this instance + */ + public ColumnSchemaBuilder key(boolean key) { + this.key = key; + this.keyUnique = key ? true : false; + return this; + } + + /** + * Sets if the column is part of the row non unique key. False by default. + * This function call overrides any previous key() and nonUniqueKey() call. + * @param key a boolean that indicates if the column is a part of the non unique key + * @return this instance + */ + public ColumnSchemaBuilder nonUniqueKey(boolean key) { + this.key = key; + this.keyUnique = false; + return this; + } + + /** + * Marks the column as allowing null values. False by default. + *

+ * NOTE: the "not-nullable-by-default" behavior here differs from + * the equivalent API in the Python and C++ clients. It also differs from the + * standard behavior of SQL CREATE TABLE statements. It is + * recommended to always specify nullability explicitly using this API + * in order to avoid confusion. + * + * @param nullable a boolean that indicates if the column allows null values + * @return this instance + */ + public ColumnSchemaBuilder nullable(boolean nullable) { + this.nullable = nullable; + return this; + } + + /** + * Marks the column as immutable or not. False by default. + * + * @param immutable a boolean that indicates if the column is immutable + * @return this instance + */ + public ColumnSchemaBuilder immutable(boolean immutable) { + this.immutable = immutable; + return this; + } + + /** + * Sets the default value that will be read from the column. Null by default. + * @param defaultValue a Java object representation of the default value that's read + * @return this instance + */ + public ColumnSchemaBuilder defaultValue(Object defaultValue) { + this.defaultValue = defaultValue; + return this; + } + + /** + * Set the desired block size for this column. + * + * This is the number of bytes of user data packed per block on disk, and + * represents the unit of IO when reading this column. Larger values + * may improve scan performance, particularly on spinning media. Smaller + * values may improve random access performance, particularly for workloads + * that have high cache hit rates or operate on fast storage such as SSD. + * + * Note that the block size specified here corresponds to uncompressed data. + * The actual size of the unit read from disk may be smaller if + * compression is enabled. + * + * It's recommended that this not be set any lower than 4096 (4KB) or higher + * than 1048576 (1MB). + * @param desiredBlockSize the desired block size, in bytes + * @return this instance + * + */ + public ColumnSchemaBuilder desiredBlockSize(int desiredBlockSize) { + this.desiredBlockSize = desiredBlockSize; + return this; + } + + /** + * Set the block encoding for this column. See the documentation for the list + * of valid options. + */ + public ColumnSchemaBuilder encoding(Encoding encoding) { + this.encoding = encoding; + return this; + } + + /** + * Set the compression algorithm for this column. See the documentation for the list + * of valid options. + */ + public ColumnSchemaBuilder compressionAlgorithm(CompressionAlgorithm compressionAlgorithm) { + this.compressionAlgorithm = compressionAlgorithm; + return this; + } + + /** + * Set the column type attributes for this column. + */ + public ColumnSchemaBuilder typeAttributes(ColumnTypeAttributes typeAttributes) { + if (typeAttributes != null && !TYPES_WITH_ATTRIBUTES.contains(type)) { + throw new IllegalArgumentException( + "ColumnTypeAttributes are not used on " + type + " columns"); + } + this.typeAttributes = typeAttributes; + return this; + } + + /** + * Allows an alternate {@link Common.DataType} to override the {@link Type} + * when serializing the ColumnSchema on the wire. + * This is useful for virtual columns specified by their type such as + * {@link Common.DataType#IS_DELETED}. + */ + @InterfaceAudience.Private + public ColumnSchemaBuilder wireType(Common.DataType wireType) { + this.wireType = wireType; + return this; + } + + /** + * Set the comment for this column. + */ + public ColumnSchemaBuilder comment(String comment) { + this.comment = comment; + return this; + } + + /** + * Builds a {@link ColumnSchema} using the passed parameters. + * @return a new {@link ColumnSchema} + */ + public ColumnSchema build() { + // Set the wire type if it wasn't explicitly set. + if (wireType == null) { + this.wireType = type.getDataType(typeAttributes); + } + if (type == Type.VARCHAR) { + if (typeAttributes == null || !typeAttributes.hasLength() || + typeAttributes.getLength() < CharUtil.MIN_VARCHAR_LENGTH || + typeAttributes.getLength() > CharUtil.MAX_VARCHAR_LENGTH) { + throw new IllegalArgumentException( + String.format("VARCHAR's length must be set and between %d and %d", + CharUtil.MIN_VARCHAR_LENGTH, CharUtil.MAX_VARCHAR_LENGTH)); + } + } + + return new ColumnSchema(name, type, key, keyUnique, nullable, immutable, + /* autoIncrementing */false, defaultValue, + desiredBlockSize, encoding, compressionAlgorithm, + typeAttributes, wireType, comment); + } + } + + /** + * Builder for ColumnSchema of the auto-incrementing column. It's used internally in Kudu + * client library. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static class AutoIncrementingColumnSchemaBuilder { + private final String name; + private final Type type; + private int desiredBlockSize = 0; + private Encoding encoding = null; + private CompressionAlgorithm compressionAlgorithm = null; + private Common.DataType wireType = null; + private String comment = ""; + + /** + * Constructor with default parameter values for {@link ColumnSchema}. + */ + public AutoIncrementingColumnSchemaBuilder() { + this.name = Schema.getAutoIncrementingColumnName(); + this.type = Schema.getAutoIncrementingColumnType(); + } + + /** + * Set the desired block size for this column. + */ + public AutoIncrementingColumnSchemaBuilder desiredBlockSize(int desiredBlockSize) { + this.desiredBlockSize = desiredBlockSize; + return this; + } + + /** + * Set the block encoding for this column. This function should be called when + * fetching column schema from Kudu server. + */ + public AutoIncrementingColumnSchemaBuilder encoding(Encoding encoding) { + this.encoding = encoding; + return this; + } + + /** + * Set the compression algorithm for this column. This function should be called + * when fetching column schema from Kudu server. + */ + public AutoIncrementingColumnSchemaBuilder compressionAlgorithm( + CompressionAlgorithm compressionAlgorithm) { + this.compressionAlgorithm = compressionAlgorithm; + return this; + } + + /** + * Allows an alternate {@link Common.DataType} to override the {@link Type} + * when serializing the ColumnSchema on the wire. + * This is useful for virtual columns specified by their type such as + * {@link Common.DataType#IS_DELETED}. + */ + @InterfaceAudience.Private + public AutoIncrementingColumnSchemaBuilder wireType(Common.DataType wireType) { + this.wireType = wireType; + return this; + } + + /** + * Set the comment for this column. + */ + public AutoIncrementingColumnSchemaBuilder comment(String comment) { + this.comment = comment; + return this; + } + + /** + * Builds a {@link ColumnSchema} for auto-incrementing column with passed parameters. + * @return a new {@link ColumnSchema} + */ + public ColumnSchema build() { + // Set the wire type if it wasn't explicitly set. + if (wireType == null) { + this.wireType = type.getDataType(null); + } + return new ColumnSchema(name, type, /* key */true, /* keyUnique */false, + /* nullable */false, /* immutable */false, + /* autoIncrementing */true, /* defaultValue */null, + desiredBlockSize, encoding, compressionAlgorithm, + /* typeAttributes */null, wireType, comment); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/ColumnTypeAttributes.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/ColumnTypeAttributes.java new file mode 100644 index 0000000000..b97c519a36 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/ColumnTypeAttributes.java @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu; + +import java.util.Objects; + +/** + * Represents a Kudu Table column's type attributes. + */ +@org.apache.yetus.audience.InterfaceAudience.Public +@org.apache.yetus.audience.InterfaceStability.Evolving +public class ColumnTypeAttributes { + + private final boolean hasPrecision; + private final int precision; + + private final boolean hasScale; + private final int scale; + + private final boolean hasLength; + private final int length; + + private ColumnTypeAttributes(boolean hasPrecision, int precision, + boolean hasScale, int scale, + boolean hasLength, int length) { + this.hasPrecision = hasPrecision; + this.precision = precision; + this.hasScale = hasScale; + this.scale = scale; + this.hasLength = hasLength; + this.length = length; + } + + /** + * Returns true if the precision is set; + */ + public boolean hasPrecision() { + return hasPrecision; + } + + /** + * Return the precision; + */ + public int getPrecision() { + return precision; + } + + /** + * Returns true if the scale is set; + */ + public boolean hasScale() { + return hasScale; + } + + /** + * Return the scale; + */ + public int getScale() { + return scale; + } + + /** + * Returns true if the length is set; + */ + public boolean hasLength() { + return hasLength; + } + + /** + * Returns the length; + */ + public int getLength() { + return length; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof ColumnTypeAttributes)) { + return false; + } + + ColumnTypeAttributes that = (ColumnTypeAttributes) o; + + if (hasPrecision != that.hasPrecision) { + return false; + } + if (precision != that.precision) { + return false; + } + if (hasScale != that.hasScale) { + return false; + } + if (scale != that.scale) { + return false; + } + if (hasLength != that.hasLength) { + return false; + } + if (length != that.length) { + return false; + } + + return true; + } + + @Override + public int hashCode() { + return Objects.hash(hasPrecision, precision, hasScale, scale, hasLength, length); + } + + /** + * Return a string representation appropriate for `type`. + * This is meant to be postfixed to the name of a primitive type to describe + * the full type, e.g. decimal(10, 4). + * @param type the type. + * @return a postfix string. + */ + public String toStringForType(Type type) { + if (type == Type.DECIMAL) { + return "(" + precision + ", " + scale + ")"; + } else if (type == Type.VARCHAR) { + return "(" + length + ")"; + } else { + return ""; + } + } + + @Override + public String toString() { + return "hasPrecision: " + hasPrecision + ", precision: " + precision + + ", hasScale: " + hasScale + ", scale: " + scale + + ", hasLength: " + hasLength + ", length: " + length; + } + + /** + * Builder for ColumnTypeAttributes. + */ + @org.apache.yetus.audience.InterfaceAudience.Public + @org.apache.yetus.audience.InterfaceStability.Evolving + public static class ColumnTypeAttributesBuilder { + + private boolean hasPrecision; + private int precision; + private boolean hasScale; + private int scale; + private boolean hasLength; + private int length; + + /** + * Set the precision. Only used for Decimal columns. + */ + public ColumnTypeAttributesBuilder precision(int precision) { + this.hasPrecision = true; + this.precision = precision; + return this; + } + + /** + * Set the scale. Only used for Decimal columns. + */ + public ColumnTypeAttributesBuilder scale(int scale) { + this.hasScale = true; + this.scale = scale; + return this; + } + + public ColumnTypeAttributesBuilder length(int length) { + this.hasLength = true; + this.length = length; + return this; + } + + /** + * Builds a {@link ColumnTypeAttributes} using the passed parameters. + * @return a new {@link ColumnTypeAttributes} + */ + public ColumnTypeAttributes build() { + return new ColumnTypeAttributes(hasPrecision, precision, hasScale, scale, hasLength, length); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/Schema.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/Schema.java new file mode 100644 index 0000000000..335bd512b2 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/Schema.java @@ -0,0 +1,491 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Common.DataType; +import org.apache.kudu.client.Bytes; +import org.apache.kudu.client.PartialRow; + +/** + * Represents table's schema which is essentially a list of columns. + * This class offers a few utility methods for querying it. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class Schema { + + /* + * Column name and type of auto_incrementing_id column, which is added by Kudu engine + * automatically if the primary key is not unique. + */ + private static final String AUTO_INCREMENTING_ID_COL_NAME = "auto_incrementing_id"; + private static final Type AUTO_INCREMENTING_ID_COL_TYPE = Type.INT64; + + /** + * Mapping of column index to column. + */ + private final List columnsByIndex; + + /** + * The primary key columns. + */ + private final List primaryKeyColumns = new ArrayList<>(); + + /** + * Mapping of column name to index. + */ + private final Map columnsByName; + + /** + * Mapping of column ID to index, or null if the schema does not have assigned column IDs. + */ + private final Map columnsById; + + /** + * Mapping of column name to column ID, or null if the schema does not have assigned column IDs. + */ + private final Map columnIdByName; + + /** + * Mapping of column index to backing byte array offset. + */ + private final int[] columnOffsets; + + private final int varLengthColumnCount; + private final int rowSize; + private final boolean isKeyUnique; + private final boolean hasNullableColumns; + private final boolean hasImmutableColumns; + private final boolean hasAutoIncrementingColumn; + + private final int isDeletedIndex; + private static final int NO_IS_DELETED_INDEX = -1; + + /** + * Constructs a schema using the specified columns and does some internal accounting + * + * @param columns the columns in index order + * + * See {@code ColumnPBsToSchema()} in {@code src/kudu/common/wire_protocol.cc} + */ + public Schema(List columns) { + this(columns, null); + } + + /** + * Constructs a schema using the specified columns and IDs. + * + * This is not a stable API, prefer using {@link Schema#Schema(List)} to create a new schema. + * + * @param columns the columns in index order + * @param columnIds the column ids of the provided columns, or null + * @throws IllegalArgumentException If the column ids length does not match the columns length + * + * See {@code ColumnPBsToSchema()} in {@code src/kudu/common/wire_protocol.cc} + */ + public Schema(List columns, List columnIds) { + boolean hasColumnIds = columnIds != null; + if (hasColumnIds && columns.size() != columnIds.size()) { + throw new IllegalArgumentException( + "Schema must be constructed with all column IDs, or none."); + } + + boolean isKeyFound = false; + boolean isKeyUnique = false; + boolean hasAutoIncrementing = false; + int keyColumnCount = 0; + int maxColumnId = Integer.MIN_VALUE; + // Check if auto-incrementing column should be added into the input columns list + for (int index = 0; index < columns.size(); index++) { + final ColumnSchema column = columns.get(index); + if (column.isKey()) { + keyColumnCount++; + if (!isKeyFound) { + isKeyFound = true; + isKeyUnique = column.isKeyUnique(); + } else if (isKeyUnique != column.isKeyUnique()) { + throw new IllegalArgumentException( + "Mixture of unique key and non unique key in a table"); + } + } + if (column.isAutoIncrementing()) { + if (!hasAutoIncrementing) { + hasAutoIncrementing = true; + } else { + throw new IllegalArgumentException( + "More than one columns are set as auto-incrementing columns"); + } + } + if (hasColumnIds && maxColumnId < columnIds.get(index).intValue()) { + maxColumnId = columnIds.get(index).intValue(); + } + } + // Add auto-incrementing column into input columns list if the primary key is not + // unique and auto-incrementing column has not been created. + if (keyColumnCount > 0 && !isKeyUnique && !hasAutoIncrementing) { + // Build auto-incrementing column + ColumnSchema autoIncrementingColumn = + new ColumnSchema.AutoIncrementingColumnSchemaBuilder().build(); + // Make a copy of mutable list of columns, then add an auto-incrementing + // column after the columns marked as key columns. + columns = new ArrayList<>(columns); + Preconditions.checkNotNull(columns); + columns.add(keyColumnCount, autoIncrementingColumn); + if (hasColumnIds) { + columnIds = new ArrayList<>(columnIds); + columnIds.add(keyColumnCount, maxColumnId + 1); + } + hasAutoIncrementing = true; + } + + this.columnsByIndex = ImmutableList.copyOf(columns); + int varLenCnt = 0; + this.columnOffsets = new int[columns.size()]; + this.columnsByName = new HashMap<>(columns.size()); + this.columnsById = hasColumnIds ? new HashMap<>(columnIds.size()) : null; + this.columnIdByName = hasColumnIds ? new HashMap<>(columnIds.size()) : null; + int offset = 0; + boolean hasNulls = false; + boolean hasImmutables = false; + int isDeletedIndex = NO_IS_DELETED_INDEX; + // pre-compute a few counts and offsets + for (int index = 0; index < columns.size(); index++) { + final ColumnSchema column = columns.get(index); + if (column.isKey()) { + primaryKeyColumns.add(column); + } + + hasNulls |= column.isNullable(); + hasImmutables |= column.isImmutable(); + columnOffsets[index] = offset; + offset += column.getTypeSize(); + if (this.columnsByName.put(column.getName(), index) != null) { + throw new IllegalArgumentException( + String.format("Column names must be unique: %s", columns)); + } + if (column.getType() == Type.STRING || column.getType() == Type.BINARY) { + varLenCnt++; + } + + if (hasColumnIds) { + if (this.columnsById.put(columnIds.get(index), index) != null) { + throw new IllegalArgumentException( + String.format("Column IDs must be unique: %s", columnIds)); + } + if (this.columnIdByName.put(column.getName(), columnIds.get(index)) != null) { + throw new IllegalArgumentException( + String.format("Column names must be unique: %s", columnIds)); + } + } + + // If this is the IS_DELETED virtual column, set `hasIsDeleted` and `isDeletedIndex`. + if (column.getWireType() == DataType.IS_DELETED) { + isDeletedIndex = index; + } + } + + this.varLengthColumnCount = varLenCnt; + this.rowSize = getRowSize(this.columnsByIndex); + this.isKeyUnique = isKeyUnique; + this.hasNullableColumns = hasNulls; + this.hasImmutableColumns = hasImmutables; + this.hasAutoIncrementingColumn = hasAutoIncrementing; + this.isDeletedIndex = isDeletedIndex; + } + + /** + * Get the list of columns used to create this schema + * @return list of columns + */ + public List getColumns() { + return this.columnsByIndex; + } + + /** + * Get the count of columns with variable length (BINARY/STRING) in + * this schema. + * @return strings count + */ + public int getVarLengthColumnCount() { + return this.varLengthColumnCount; + } + + /** + * Get the size a row built using this schema would be + * @return size in bytes + */ + public int getRowSize() { + return this.rowSize; + } + + /** + * Gives the size in bytes for a single row given the specified schema + * @param columns the row's columns + * @return row size in bytes + */ + private static int getRowSize(List columns) { + int totalSize = 0; + boolean hasNullables = false; + for (ColumnSchema column : columns) { + totalSize += column.getTypeSize(); + hasNullables |= column.isNullable(); + } + if (hasNullables) { + totalSize += Bytes.getBitSetSize(columns.size()); + } + return totalSize; + } + + /** + * Get the index at which this column can be found in the backing byte array + * @param idx column's index + * @return column's offset + */ + public int getColumnOffset(int idx) { + return this.columnOffsets[idx]; + } + + /** + * Returns true if the column exists. + * @param columnName column to search for + * @return true if the column exists + */ + public boolean hasColumn(String columnName) { + return this.columnsByName.containsKey(columnName); + } + + /** + * Get the index for the provided column name. + * @param columnName column to search for + * @return an index in the schema + */ + public int getColumnIndex(String columnName) { + Integer index = this.columnsByName.get(columnName); + if (index == null) { + throw new IllegalArgumentException( + String.format("Unknown column: %s", columnName)); + } + return index; + } + + /** + * Get the column index of the column with the provided ID. + * This method is not part of the stable API. + * @param columnId the column id of the column + * @return the column index of the column. + */ + public int getColumnIndex(int columnId) { + if (!hasColumnIds()) { + throw new IllegalStateException("Schema does not have Column IDs"); + } + Integer index = this.columnsById.get(columnId); + if (index == null) { + throw new IllegalArgumentException( + String.format("Unknown column id: %s", columnId)); + } + return index; + } + + /** + * Get the column at the specified index in the original list + * @param idx column's index + * @return the column + */ + public ColumnSchema getColumnByIndex(int idx) { + return this.columnsByIndex.get(idx); + } + + /** + * Get the column associated with the specified name + * @param columnName column's name + * @return the column + */ + public ColumnSchema getColumn(String columnName) { + return columnsByIndex.get(getColumnIndex(columnName)); + } + + /** + * Get the count of columns in this schema + * @return count of columns + */ + public int getColumnCount() { + return this.columnsByIndex.size(); + } + + /** + * Get the count of columns that are part of the primary key. + * @return count of primary key columns. + */ + public int getPrimaryKeyColumnCount() { + return this.primaryKeyColumns.size(); + } + + /** + * Get the primary key columns. + * @return the primary key columns. + */ + public List getPrimaryKeyColumns() { + return primaryKeyColumns; + } + + /** + * Answers if the primary key is unique for the table + * @return true if the key is unique + */ + public boolean isPrimaryKeyUnique() { + return this.isKeyUnique; + } + + /** + * Tells if there's auto-incrementing column + * @return true if there's auto-incrementing column, else false. + */ + public boolean hasAutoIncrementingColumn() { + return this.hasAutoIncrementingColumn; + } + + /** + * Get the name of the auto-incrementing column + * @return column name of the auto-incrementing column. + */ + public static String getAutoIncrementingColumnName() { + return AUTO_INCREMENTING_ID_COL_NAME; + } + + /** + * Get the type of the auto-incrementing column + * @return type of the auto-incrementing column. + */ + public static Type getAutoIncrementingColumnType() { + return AUTO_INCREMENTING_ID_COL_TYPE; + } + + /** + * Get a schema that only contains the columns which are part of the key + * @return new schema with only the keys + */ + public Schema getRowKeyProjection() { + return new Schema(primaryKeyColumns); + } + + /** + * Tells if there's at least one nullable column + * @return true if at least one column is nullable, else false. + */ + public boolean hasNullableColumns() { + return this.hasNullableColumns; + } + + /** + * Tells if there's at least one immutable column + * @return true if at least one column is immutable, else false. + */ + public boolean hasImmutableColumns() { + return this.hasImmutableColumns; + } + + /** + * Tells whether this schema includes IDs for columns. A schema created by a client as part of + * table creation will not include IDs, but schemas for open tables will include IDs. + * This method is not part of the stable API. + * + * @return whether this schema includes column IDs. + */ + public boolean hasColumnIds() { + return columnsById != null; + } + + /** + * Get the internal column ID for a column name. + * @param columnName column's name + * @return the column ID + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public int getColumnId(String columnName) { + return columnIdByName.get(columnName); + } + + /** + * Creates a new partial row for the schema. + * @return a new partial row + */ + public PartialRow newPartialRow() { + return new PartialRow(this); + } + + /** + * @return true if the schema has the IS_DELETED virtual column + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public boolean hasIsDeleted() { + return isDeletedIndex != NO_IS_DELETED_INDEX; + } + + /** + * @return the index of the IS_DELETED virtual column + * @throws IllegalStateException if no IS_DELETED virtual column exists + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public int getIsDeletedIndex() { + Preconditions.checkState(hasIsDeleted(), "Schema doesn't have an IS_DELETED columns"); + return isDeletedIndex; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof Schema)) { + return false; + } + Schema that = (Schema) obj; + if (this.getPrimaryKeyColumnCount() != that.getPrimaryKeyColumnCount()) { + return false; + } + if (this.getColumns().size() != that.getColumns().size()) { + return false; + } + for (int i = 0; i < this.getColumns().size(); i++) { + if (!this.getColumnByIndex(i).equals(that.getColumnByIndex(i))) { + return false; + } + } + return true; + } + + @Override + public int hashCode() { + return Objects.hash(varLengthColumnCount, rowSize, isKeyUnique, hasNullableColumns, + hasImmutableColumns, hasAutoIncrementingColumn); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/Type.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/Type.java new file mode 100644 index 0000000000..9d550b66c4 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/Type.java @@ -0,0 +1,236 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu; + +import static org.apache.kudu.Common.DataType; + +import java.util.Arrays; +import java.util.List; + +import com.google.common.collect.ImmutableList; +import com.google.common.primitives.Ints; +import com.google.common.primitives.Longs; +import com.google.common.primitives.Shorts; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.util.DecimalUtil; + +/** + * Describes all the types available to build table schemas. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum Type { + + INT8(DataType.INT8, "int8"), + INT16(DataType.INT16, "int16"), + INT32(DataType.INT32, "int32"), + INT64(DataType.INT64, "int64"), + BINARY(DataType.BINARY, "binary"), + STRING(DataType.STRING, "string"), + BOOL(DataType.BOOL, "bool"), + FLOAT(DataType.FLOAT, "float"), + DOUBLE(DataType.DOUBLE, "double"), + UNIXTIME_MICROS(DataType.UNIXTIME_MICROS, "unixtime_micros"), + DECIMAL(Arrays.asList(DataType.DECIMAL32, DataType.DECIMAL64, DataType.DECIMAL128), "decimal"), + VARCHAR(DataType.VARCHAR, "varchar"), + DATE(DataType.DATE, "date"); + + private final ImmutableList dataTypes; + private final String name; + private final int size; + + /** + * Private constructor used to pre-create the types + * @param dataType DataType from the common's pb + * @param name string representation of the type + */ + Type(DataType dataType, String name) { + this.dataTypes = ImmutableList.of(dataType); + this.name = name; + this.size = getTypeSize(dataType); + } + + Type(List dataTypes, String name) { + this.dataTypes = ImmutableList.copyOf(dataTypes); + this.name = name; + this.size = -1; + } + + /** + * Get the data type from the common's pb + * @return A DataType + * @deprecated use {@link #getDataType(ColumnTypeAttributes)} + */ + @Deprecated + public DataType getDataType() { + if (this == DECIMAL) { + throw new IllegalStateException("Please use the newer getDataType(ColumnTypeAttributes) " + + "to support the Decimal data type"); + } + return this.dataTypes.get(0); + } + + /** + * Get the data type from the common's pb + * @param typeAttributes the additional attributes of the type. + * @return A DataType + */ + public DataType getDataType(ColumnTypeAttributes typeAttributes) { + if (this == DECIMAL) { + return DecimalUtil.precisionToDataType(typeAttributes.getPrecision()); + } + return this.dataTypes.get(0); + } + + /** + * Get the string representation of this type + * @return The type's name + */ + public String getName() { + return this.name; + } + + /** + * The size of this type on the wire + * @return A size + * @deprecated use {@link #getSize(ColumnTypeAttributes)} + */ + @Deprecated + public int getSize() { + if (this == DECIMAL) { + throw new IllegalStateException("Please use the newer getSize(ColumnTypeAttributes) " + + "to support the Decimal data type"); + } + return this.size; + } + + /** + * The size of this type on the wire + * @param typeAttributes the additional attributes of the type. + * @return A size + */ + public int getSize(ColumnTypeAttributes typeAttributes) { + if (this == DECIMAL) { + return DecimalUtil.precisionToSize(typeAttributes.getPrecision()); + } + return this.size; + } + + @Override + public String toString() { + return "Type: " + this.name; + } + + /** + * Gives the size in bytes for a given DataType, as per the pb specification + * @param type pb type + * @return size in bytes + */ + private static int getTypeSize(DataType type) { + switch (type) { + case STRING: + case BINARY: + case VARCHAR: + return 8 + 8; // offset then string length + case BOOL: + case INT8: + case IS_DELETED: + return 1; + case INT16: + return Shorts.BYTES; + case INT32: + case DATE: + case FLOAT: + return Ints.BYTES; + case INT64: + case DOUBLE: + case UNIXTIME_MICROS: + return Longs.BYTES; + default: throw new IllegalArgumentException( + "the provided data type doesn't map to any known one"); + } + } + + /** + * Convert the pb DataType to a Type + * @param type DataType to convert + * @return a matching Type + */ + public static Type getTypeForDataType(DataType type) { + switch (type) { + case STRING: + return STRING; + case BINARY: + return BINARY; + case VARCHAR: + return VARCHAR; + case BOOL: + case IS_DELETED: + return BOOL; + case INT8: + return INT8; + case INT16: + return INT16; + case INT32: + return INT32; + case INT64: + return INT64; + case UNIXTIME_MICROS: + return UNIXTIME_MICROS; + case FLOAT: + return FLOAT; + case DOUBLE: + return DOUBLE; + case DATE: + return DATE; + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + return DECIMAL; + default: + throw new IllegalArgumentException("the provided data type doesn't map " + + "to any known one: " + type.getDescriptorForType().getFullName()); + } + } + + /** + * Create a Type from its name + * @param name The DataType name. It accepts Type name (from the getName() + * method) and ENUM name (from the name() method). + * @throws IllegalArgumentException if the provided name doesn't map to any + * known type. + * @return a matching Type. + */ + public static Type getTypeForName(String name) { + for (Type t : values()) { + if (t.name().equals(name) || t.getName().equals(name)) { + return t; + } + } + throw new IllegalArgumentException("The provided name doesn't map to any known type: " + name); + } + + /** + * @return true if this type has a pre-determined fixed size, false otherwise + */ + public boolean isFixedSize() { + return this != BINARY && this != STRING && this != VARCHAR; + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AbortTransactionRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AbortTransactionRequest.java new file mode 100644 index 0000000000..ded18ab7ab --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AbortTransactionRequest.java @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.transactions.TxnManager.AbortTransactionResponsePB; + +import java.util.Collection; +import java.util.List; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.transactions.TxnManager; +import org.apache.kudu.util.Pair; + +/** + * A wrapper class for kudu.transactions.TxnManagerService.AbortTransaction RPC. + */ +@InterfaceAudience.Private +class AbortTransactionRequest extends KuduRpc { + private static final List featureFlags = ImmutableList.of(); + final long txnId; + + AbortTransactionRequest( + KuduTable masterTable, Timer timer, long timeoutMillis, long txnId) { + super(masterTable, timer, timeoutMillis); + Preconditions.checkArgument(txnId > AsyncKuduClient.INVALID_TXN_ID); + this.txnId = txnId; + } + + @Override + Message createRequestPB() { + final TxnManager.AbortTransactionRequestPB.Builder b = + TxnManager.AbortTransactionRequestPB.newBuilder(); + b.setTxnId(txnId); + return b.build(); + } + + @Override + String serviceName() { + return TXN_MANAGER_SERVICE_NAME; + } + + @Override + String method() { + return "AbortTransaction"; + } + + @Override + Pair deserialize( + final CallResponse callResponse, String serverUUID) throws KuduException { + final AbortTransactionResponsePB.Builder b = AbortTransactionResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), b); + AbortTransactionResponse response = new AbortTransactionResponse( + timeoutTracker.getElapsedMillis(), serverUUID); + return new Pair<>(response, b.hasError() ? b.getError() : null); + } + + @Override + Collection getRequiredFeatures() { + return featureFlags; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AbortTransactionResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AbortTransactionResponse.java new file mode 100644 index 0000000000..3fd7c7a55f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AbortTransactionResponse.java @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class AbortTransactionResponse extends KuduRpcResponse { + /** + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param serverUUID UUID of the server that sent the response + */ + AbortTransactionResponse(long elapsedMillis, String serverUUID) { + super(elapsedMillis, serverUUID); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AbstractKuduScannerBuilder.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AbstractKuduScannerBuilder.java new file mode 100644 index 0000000000..db843efcad --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AbstractKuduScannerBuilder.java @@ -0,0 +1,432 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.ImmutableList; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.tserver.Tserver; +import org.apache.kudu.util.HybridTimeUtil; + +/** + * Abstract class to extend in order to create builders for scanners. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public abstract class AbstractKuduScannerBuilder + , T> { + final AsyncKuduClient client; + final KuduTable table; + + /** Map of column name to predicate */ + final Map predicates = new HashMap<>(); + + AsyncKuduScanner.ReadMode readMode = AsyncKuduScanner.ReadMode.READ_LATEST; + boolean isFaultTolerant = false; + int batchSizeBytes = 1024 * 1024; + long limit = Long.MAX_VALUE; + boolean prefetching = false; + boolean cacheBlocks = true; + long startTimestamp = AsyncKuduClient.NO_TIMESTAMP; + long htTimestamp = AsyncKuduClient.NO_TIMESTAMP; + byte[] lowerBoundPrimaryKey = AsyncKuduClient.EMPTY_ARRAY; + byte[] upperBoundPrimaryKey = AsyncKuduClient.EMPTY_ARRAY; + byte[] lowerBoundPartitionKey = AsyncKuduClient.EMPTY_ARRAY; + byte[] upperBoundPartitionKey = AsyncKuduClient.EMPTY_ARRAY; + List projectedColumnNames = null; + List projectedColumnIndexes = null; + long scanRequestTimeout; + ReplicaSelection replicaSelection = ReplicaSelection.LEADER_ONLY; + long keepAlivePeriodMs = AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS; + String queryId = ""; + + AbstractKuduScannerBuilder(AsyncKuduClient client, KuduTable table) { + this.client = client; + this.table = table; + this.scanRequestTimeout = client.getDefaultOperationTimeoutMs(); + } + + /** + * Sets the read mode, the default is to read the latest values. + * @param readMode a read mode for the scanner + * @return this instance + */ + @SuppressWarnings("unchecked") + public S readMode(AsyncKuduScanner.ReadMode readMode) { + this.readMode = readMode; + return (S) this; + } + + /** + * Make scans resumable at another tablet server if current server fails if + * isFaultTolerant is true. + *

+ * Scans are by default non fault-tolerant, and scans will fail + * if scanning an individual tablet fails (for example, if a tablet server + * crashes in the middle of a tablet scan). If isFaultTolerant is set to true, + * scans will be resumed at another tablet server in the case of failure. + * + * Fault-tolerant scans typically have lower throughput than non + * fault-tolerant scans. Fault tolerant scans use READ_AT_SNAPSHOT read mode. + * If no snapshot timestamp is provided, the server will pick one. + * + * @param isFaultTolerant a boolean that indicates if scan is fault-tolerant + * @return this instance + */ + @SuppressWarnings("unchecked") + public S setFaultTolerant(boolean isFaultTolerant) { + this.isFaultTolerant = isFaultTolerant; + if (isFaultTolerant) { + this.readMode = AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT; + } + return (S) this; + } + + /** + * Adds a predicate for a column. + * @param predicate predicate for a column to add + * @return this instance + * @deprecated use {@link #addPredicate(KuduPredicate)} + */ + @Deprecated + public S addColumnRangePredicate(ColumnRangePredicate predicate) { + return addPredicate(predicate.toKuduPredicate()); + } + + /** + * Adds a list of predicates in their raw format, + * as given by {@link ColumnRangePredicate#toByteArray(List)}. + * @param predicateBytes predicates to add + * @return this instance + * @throws IllegalArgumentException thrown when the passed bytes aren't valid + * @deprecated use {@link #addPredicate} + */ + @Deprecated + @SuppressWarnings("unchecked") + public S addColumnRangePredicatesRaw(byte[] predicateBytes) { + for (Tserver.ColumnRangePredicatePB pb : ColumnRangePredicate.fromByteArray(predicateBytes)) { + addPredicate(ColumnRangePredicate.fromPb(pb).toKuduPredicate()); + } + return (S) this; + } + + /** + * Adds a predicate to the scan. + * @param predicate predicate to add + * @return this instance + */ + @SuppressWarnings("unchecked") + public S addPredicate(KuduPredicate predicate) { + String columnName = predicate.getColumn().getName(); + KuduPredicate existing = predicates.get(columnName); + if (existing != null) { + predicate = existing.merge(predicate); + } + + // KUDU-1652: Do not send an IS NOT NULL predicate to the server for a non-nullable column. + if (!predicate.getColumn().isNullable() && + predicate.getType() == KuduPredicate.PredicateType.IS_NOT_NULL) { + return (S) this; + } + + predicates.put(columnName, predicate); + return (S) this; + } + + /** + * Set which columns will be read by the Scanner. + * Calling this method after {@link #setProjectedColumnIndexes(List)} will reset the projected + * columns to those specified in {@code columnNames}. + * @param columnNames the names of columns to read, or 'null' to read all columns + * (the default) + */ + @SuppressWarnings("unchecked") + public S setProjectedColumnNames(List columnNames) { + projectedColumnIndexes = null; + if (columnNames != null) { + projectedColumnNames = ImmutableList.copyOf(columnNames); + } else { + projectedColumnNames = null; + } + return (S) this; + } + + /** + * Set which columns will be read by the Scanner. + * Calling this method after {@link #setProjectedColumnNames(List)} will reset the projected + * columns to those specified in {@code columnIndexes}. + * @param columnIndexes the indexes of columns to read, or 'null' to read all columns + * (the default) + */ + @SuppressWarnings("unchecked") + public S setProjectedColumnIndexes(List columnIndexes) { + projectedColumnNames = null; + if (columnIndexes != null) { + projectedColumnIndexes = ImmutableList.copyOf(columnIndexes); + } else { + projectedColumnIndexes = null; + } + return (S) this; + } + + /** + * Sets the maximum number of bytes returned by the scanner, on each batch. The default is 1MB. + *

+ * Kudu may actually return more than this many bytes because it will not + * truncate a rowResult in the middle. + * @param batchSizeBytes a strictly positive number of bytes + * @return this instance + */ + @SuppressWarnings("unchecked") + public S batchSizeBytes(int batchSizeBytes) { + this.batchSizeBytes = batchSizeBytes; + return (S) this; + } + + /** + * Sets a limit on the number of rows that will be returned by the scanner. There's no limit + * by default. + * + * @param limit a positive long + * @return this instance + */ + @SuppressWarnings("unchecked") + public S limit(long limit) { + this.limit = limit; + return (S) this; + } + + /** + * Enables prefetching of rows for the scanner, i.e. whether to send a request for more data + * to the server immediately after we receive a response (instead of waiting for the user + * to call {@code nextRows()}). Disabled by default. + * NOTE: This is risky until KUDU-1260 is resolved. + * @param prefetching a boolean that indicates if the scanner should prefetch rows + * @return this instance + */ + @SuppressWarnings("unchecked") + public S prefetching(boolean prefetching) { + this.prefetching = prefetching; + return (S) this; + } + + /** + * Sets the block caching policy for the scanner. If true, scanned data blocks will be cached + * in memory and made available for future scans. Enabled by default. + * @param cacheBlocks a boolean that indicates if data blocks should be cached or not + * @return this instance + */ + @SuppressWarnings("unchecked") + public S cacheBlocks(boolean cacheBlocks) { + this.cacheBlocks = cacheBlocks; + return (S) this; + } + + /** + * Sets a previously encoded HT timestamp as a snapshot timestamp, for tests. None is used by + * default. + * Requires that the ReadMode is READ_AT_SNAPSHOT. + * @param htTimestamp a long representing a HybridTime-encoded timestamp + * @return this instance + * @throws IllegalArgumentException on build(), if the timestamp is less than 0 or if the + * read mode was not set to READ_AT_SNAPSHOT + */ + @InterfaceAudience.Private + @SuppressWarnings("unchecked") + public S snapshotTimestampRaw(long htTimestamp) { + this.htTimestamp = htTimestamp; + return (S) this; + } + + /** + * Sets the timestamp the scan must be executed at, in microseconds since the Unix epoch. None is + * used by default. + * Requires that the ReadMode is READ_AT_SNAPSHOT. + * @param timestamp a long representing an instant in microseconds since the unix epoch. + * @return this instance + * @throws IllegalArgumentException on build(), if the timestamp is less than 0 or if the + * read mode was not set to READ_AT_SNAPSHOT + */ + @SuppressWarnings("unchecked") + public S snapshotTimestampMicros(long timestamp) { + this.htTimestamp = HybridTimeUtil.physicalAndLogicalToHTTimestamp(timestamp, 0); + return (S) this; + } + + /** + * Sets the start timestamp and end timestamp for a diff scan. + * The timestamps should be encoded HT timestamps. + * + * Additionally sets any other scan properties required by diff scans. + * + * @param startTimestamp a long representing a HybridTime-encoded start timestamp + * @param endTimestamp a long representing a HybridTime-encoded end timestamp + * @return this instance + */ + @InterfaceAudience.Private + @SuppressWarnings("unchecked") + public S diffScan(long startTimestamp, long endTimestamp) { + this.startTimestamp = startTimestamp; + this.htTimestamp = endTimestamp; + this.isFaultTolerant = true; + this.readMode = AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT; + return (S) this; + } + + /** + * Sets how long each scan request to a server can last. + * Defaults to {@link KuduClient#getDefaultOperationTimeoutMs()}. + * @param scanRequestTimeout a long representing time in milliseconds + * @return this instance + */ + @SuppressWarnings("unchecked") + public S scanRequestTimeout(long scanRequestTimeout) { + this.scanRequestTimeout = scanRequestTimeout; + return (S) this; + } + + /** + * Add a lower bound (inclusive) primary key for the scan. + * If any bound is already added, this bound is intersected with that one. + * @param partialRow a partial row with specified key columns + * @return this instance + */ + public S lowerBound(PartialRow partialRow) { + return lowerBoundRaw(partialRow.encodePrimaryKey()); + } + + /** + * Like lowerBoundPrimaryKey() but the encoded primary key is an opaque byte + * array obtained elsewhere. + * @param startPrimaryKey bytes containing an encoded start key + * @return this instance + * @deprecated use {@link #lowerBound(PartialRow)} + */ + @Deprecated + @SuppressWarnings("unchecked") + public S lowerBoundRaw(byte[] startPrimaryKey) { + if (lowerBoundPrimaryKey.length == 0 || + Bytes.memcmp(startPrimaryKey, lowerBoundPrimaryKey) > 0) { + this.lowerBoundPrimaryKey = startPrimaryKey; + } + return (S) this; + } + + /** + * Add an upper bound (exclusive) primary key for the scan. + * If any bound is already added, this bound is intersected with that one. + * @param partialRow a partial row with specified key columns + * @return this instance + */ + public S exclusiveUpperBound(PartialRow partialRow) { + return exclusiveUpperBoundRaw(partialRow.encodePrimaryKey()); + } + + /** + * Like exclusiveUpperBound() but the encoded primary key is an opaque byte + * array obtained elsewhere. + * @param endPrimaryKey bytes containing an encoded end key + * @return this instance + * @deprecated use {@link #exclusiveUpperBound(PartialRow)} + */ + @Deprecated + @SuppressWarnings("unchecked") + public S exclusiveUpperBoundRaw(byte[] endPrimaryKey) { + if (upperBoundPrimaryKey.length == 0 || + Bytes.memcmp(endPrimaryKey, upperBoundPrimaryKey) < 0) { + this.upperBoundPrimaryKey = endPrimaryKey; + } + return (S) this; + } + + /** + * Sets the replica selection mechanism for this scanner. The default is to read from the + * currently known leader. + * @param replicaSelection replication selection mechanism to use + * @return this instance + */ + @SuppressWarnings("unchecked") + public S replicaSelection(ReplicaSelection replicaSelection) { + this.replicaSelection = replicaSelection; + return (S) this; + } + + /** + * Set an encoded (inclusive) start partition key for the scan. + * + * @param partitionKey the encoded partition key + * @return this instance + */ + @SuppressWarnings("unchecked") + S lowerBoundPartitionKeyRaw(byte[] partitionKey) { + if (Bytes.memcmp(partitionKey, lowerBoundPartitionKey) > 0) { + this.lowerBoundPartitionKey = partitionKey; + } + return (S) this; + } + + /** + * Set an encoded (exclusive) end partition key for the scan. + * + * @param partitionKey the encoded partition key + * @return this instance + */ + @SuppressWarnings("unchecked") + S exclusiveUpperBoundPartitionKeyRaw(byte[] partitionKey) { + if (upperBoundPartitionKey.length == 0 || + Bytes.memcmp(partitionKey, upperBoundPartitionKey) < 0) { + this.upperBoundPartitionKey = partitionKey; + } + return (S) this; + } + + /** + * Set the period at which to send keep-alive requests to the tablet + * server to ensure that this scanner will not time out. + * + * @param keepAlivePeriodMs the keep alive period in milliseconds + * @return this instance + */ + @SuppressWarnings("unchecked") + public S keepAlivePeriodMs(long keepAlivePeriodMs) { + this.keepAlivePeriodMs = keepAlivePeriodMs; + return (S) this; + } + + /** + * Set a query id for the scan to trace the whole scanning process. + * Query id is posted by the user or generated automatically by the + * client library code. It is used to trace the whole query process + * for debugging. + * + * @param queryId query id to trace a query. + * @return this instance + */ + @SuppressWarnings("unchecked") + public S setQueryId(String queryId) { + this.queryId = queryId; + return (S) this; + } + + public abstract T build(); +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AlterTableOptions.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AlterTableOptions.java new file mode 100644 index 0000000000..f3d7e4637f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AlterTableOptions.java @@ -0,0 +1,605 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.ColumnSchema.CompressionAlgorithm; +import static org.apache.kudu.ColumnSchema.Encoding; +import static org.apache.kudu.master.Master.AlterTableRequestPB; + +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.protobuf.ByteString; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Common; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.ProtobufHelper.SchemaPBConversionFlags; +import org.apache.kudu.master.Master; + +/** + * This builder must be used to alter a table. At least one change must be specified. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class AlterTableOptions { + private final AlterTableRequestPB.Builder pb = AlterTableRequestPB.newBuilder(); + private boolean wait = true; + private boolean isAddingRangeWithCustomHashSchema = false; + + /** + * Change a table's name. + * @param newName new table's name, must be used to check progress + * @return this instance + */ + public AlterTableOptions renameTable(String newName) { + pb.setNewTableName(newName); + return this; + } + + /** + * Change a table's owner. + * @param owner the new table owner + * @return this instance + */ + public AlterTableOptions setOwner(String owner) { + pb.setNewTableOwner(owner); + return this; + } + + /** + * Change a table's comment. + * @param comment the new table comment + * @return this instance + */ + public AlterTableOptions setComment(String comment) { + pb.setNewTableComment(comment); + return this; + } + + /** + * Add a new column. + * @param colSchema the schema of the new column + * @return this instance + */ + public AlterTableOptions addColumn(ColumnSchema colSchema) { + if (colSchema.getName().equalsIgnoreCase(Schema.getAutoIncrementingColumnName())) { + throw new IllegalArgumentException("Column name " + + Schema.getAutoIncrementingColumnName() + " is reserved by Kudu engine"); + } + if (!colSchema.isNullable() && colSchema.getDefaultValue() == null) { + throw new IllegalArgumentException("A new non-null column must have a default value"); + } + if (colSchema.isKey()) { + throw new IllegalArgumentException("Key columns cannot be added"); + } + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.ADD_COLUMN); + step.setAddColumn(AlterTableRequestPB.AddColumn.newBuilder() + .setSchema(ProtobufHelper.columnToPb(colSchema))); + return this; + } + + /** + * Add a new column that's not nullable. + * @param name name of the new column + * @param type type of the new column + * @param defaultVal default value used for the currently existing rows + * @return this instance + */ + public AlterTableOptions addColumn(String name, Type type, Object defaultVal) { + return addColumn(new ColumnSchema.ColumnSchemaBuilder(name, type) + .defaultValue(defaultVal) + .build()); + } + + /** + * Add a new column that's nullable and has no default value. + * @param name name of the new column + * @param type type of the new column + * @return this instance + */ + public AlterTableOptions addNullableColumn(String name, Type type) { + return addNullableColumn(name, type, null); + } + + /** + * Add a new column that's nullable. + * @param name name of the new column + * @param type type of the new column + * @param defaultVal the default value of the new column + * @return this instance + */ + public AlterTableOptions addNullableColumn(String name, Type type, Object defaultVal) { + return addColumn(new ColumnSchema.ColumnSchemaBuilder(name, type) + .nullable(true) + .defaultValue(defaultVal) + .build()); + } + + /** + * Drop a column. + * @param name name of the column + * @return this instance + */ + public AlterTableOptions dropColumn(String name) { + if (name.equalsIgnoreCase(Schema.getAutoIncrementingColumnName())) { + throw new IllegalArgumentException("Cannot remove auto-incrementing column " + + Schema.getAutoIncrementingColumnName()); + } + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.DROP_COLUMN); + step.setDropColumn(AlterTableRequestPB.DropColumn.newBuilder().setName(name)); + return this; + } + + /** + * Change the name of a column. + * @param oldName old column's name, must exist + * @param newName new name to use + * @return this instance + */ + public AlterTableOptions renameColumn(String oldName, String newName) { + if (oldName.equalsIgnoreCase(Schema.getAutoIncrementingColumnName())) { + throw new IllegalArgumentException("Cannot rename auto-incrementing column " + + Schema.getAutoIncrementingColumnName()); + } + // For backwards compatibility, this uses the RENAME_COLUMN step type. + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.RENAME_COLUMN); + step.setRenameColumn(AlterTableRequestPB.RenameColumn.newBuilder().setOldName(oldName) + .setNewName(newName)); + return this; + } + + /** + * Remove the default value for a column. + * @param name name of the column + * @return this instance + */ + public AlterTableOptions removeDefault(String name) { + if (name.equalsIgnoreCase(Schema.getAutoIncrementingColumnName())) { + throw new IllegalArgumentException("Auto-incrementing column " + + Schema.getAutoIncrementingColumnName() + " does not have default value"); + } + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.ALTER_COLUMN); + AlterTableRequestPB.AlterColumn.Builder alterBuilder = + AlterTableRequestPB.AlterColumn.newBuilder(); + alterBuilder.setDelta( + Common.ColumnSchemaDeltaPB.newBuilder().setName(name).setRemoveDefault(true)); + step.setAlterColumn(alterBuilder); + return this; + } + + /** + * Change the default value for a column. `newDefault` must not be null or + * else throws {@link IllegalArgumentException}. + * @param name name of the column + * @param newDefault the new default value + * @return this instance + */ + public AlterTableOptions changeDefault(String name, Object newDefault) { + if (name.equalsIgnoreCase(Schema.getAutoIncrementingColumnName())) { + throw new IllegalArgumentException("Cannot set default value for " + + "auto-incrementing column " + Schema.getAutoIncrementingColumnName()); + } + if (newDefault == null) { + throw new IllegalArgumentException("newDefault cannot be null: " + + "use removeDefault to clear a default value"); + } + + ByteString defaultByteString = ProtobufHelper.objectToByteStringNoType(name, newDefault); + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.ALTER_COLUMN); + AlterTableRequestPB.AlterColumn.Builder alterBuilder = + AlterTableRequestPB.AlterColumn.newBuilder(); + alterBuilder.setDelta( + Common.ColumnSchemaDeltaPB.newBuilder().setName(name) + .setDefaultValue(defaultByteString)); + step.setAlterColumn(alterBuilder); + return this; + } + + /** + * Change the block size of a column's storage. A nonpositive value indicates + * a server-side default. + * @param name name of the column + * @param blockSize the new block size + * @return this instance + */ + public AlterTableOptions changeDesiredBlockSize(String name, int blockSize) { + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.ALTER_COLUMN); + AlterTableRequestPB.AlterColumn.Builder alterBuilder = + AlterTableRequestPB.AlterColumn.newBuilder(); + alterBuilder.setDelta( + Common.ColumnSchemaDeltaPB.newBuilder().setName(name).setBlockSize(blockSize)); + step.setAlterColumn(alterBuilder); + return this; + } + + /** + * Change the encoding used for a column. + * @param name name of the column + * @param encoding the new encoding + * @return this instance + */ + public AlterTableOptions changeEncoding(String name, Encoding encoding) { + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.ALTER_COLUMN); + AlterTableRequestPB.AlterColumn.Builder alterBuilder = + AlterTableRequestPB.AlterColumn.newBuilder(); + alterBuilder.setDelta( + Common.ColumnSchemaDeltaPB.newBuilder().setName(name) + .setEncoding(encoding.getInternalPbType())); + step.setAlterColumn(alterBuilder); + return this; + } + + /** + * Change the compression used for a column. + * @param name the name of the column + * @param ca the new compression algorithm + * @return this instance + */ + public AlterTableOptions changeCompressionAlgorithm(String name, CompressionAlgorithm ca) { + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.ALTER_COLUMN); + AlterTableRequestPB.AlterColumn.Builder alterBuilder = + AlterTableRequestPB.AlterColumn.newBuilder(); + alterBuilder.setDelta( + Common.ColumnSchemaDeltaPB.newBuilder().setName(name) + .setCompression(ca.getInternalPbType())); + step.setAlterColumn(alterBuilder); + return this; + } + + /** + * Add a range partition to the table with an inclusive lower bound and an exclusive upper bound. + * + * If either row is empty, then that end of the range will be unbounded. If a range column is + * missing a value, the logical minimum value for that column type will be used as the default. + * + * Multiple range partitions may be added as part of a single alter table transaction by calling + * this method multiple times. Added range partitions must not overlap with each + * other or any existing range partitions (unless the existing range partitions are dropped as + * part of the alter transaction first). The lower bound must be less than the upper bound. + * + * This client will immediately be able to write and scan the new tablets when the alter table + * operation returns success, however other existing clients may have to wait for a timeout period + * to elapse before the tablets become visible. This period is configured by the master's + * 'table_locations_ttl_ms' flag, and defaults to 5 minutes. + * + * @param lowerBound inclusive lower bound, may be empty but not null + * @param upperBound exclusive upper bound, may be empty but not null + * @return this instance + */ + public AlterTableOptions addRangePartition(PartialRow lowerBound, PartialRow upperBound) { + return addRangePartition(lowerBound, upperBound, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + } + + /** + * Add a range partition to the table with a lower bound and upper bound. + * + * If either row is empty, then that end of the range will be unbounded. If a range column is + * missing a value, the logical minimum value for that column type will be used as the default. + * + * Multiple range partitions may be added as part of a single alter table transaction by calling + * this method multiple times. Added range partitions must not overlap with each + * other or any existing range partitions (unless the existing range partitions are dropped as + * part of the alter transaction first). The lower bound must be less than the upper bound. + * + * This client will immediately be able to write and scan the new tablets when the alter table + * operation returns success, however other existing clients may have to wait for a timeout period + * to elapse before the tablets become visible. This period is configured by the master's + * 'table_locations_ttl_ms' flag, and defaults to 5 minutes. + * + * @param lowerBound lower bound, may be empty but not null + * @param upperBound upper bound, may be empty but not null + * @param lowerBoundType the type of the lower bound, either inclusive or exclusive + * @param upperBoundType the type of the upper bound, either inclusive or exclusive + * @return this instance + */ + public AlterTableOptions addRangePartition(PartialRow lowerBound, + PartialRow upperBound, + RangePartitionBound lowerBoundType, + RangePartitionBound upperBoundType) { + return addRangePartition(lowerBound, upperBound, null, lowerBoundType, upperBoundType); + } + + /** + * Add a range partition to the table with dimension label. + * + * If either row is empty, then that end of the range will be unbounded. If a range column is + * missing a value, the logical minimum value for that column type will be used as the default. + * + * Multiple range partitions may be added as part of a single alter table transaction by calling + * this method multiple times. Added range partitions must not overlap with each + * other or any existing range partitions (unless the existing range partitions are dropped as + * part of the alter transaction first). The lower bound must be less than the upper bound. + * + * This client will immediately be able to write and scan the new tablets when the alter table + * operation returns success, however other existing clients may have to wait for a timeout period + * to elapse before the tablets become visible. This period is configured by the master's + * 'table_locations_ttl_ms' flag, and defaults to 5 minutes. + * + * By default, the master will try to place newly created tablet replicas on tablet + * servers with a small number of tablet replicas. If the dimension label is provided, + * newly created replicas will be evenly distributed in the cluster based on the dimension + * label. In other words, the master will try to place newly created tablet replicas on + * tablet servers with a small number of tablet replicas belonging to this dimension label. + * + * @param lowerBound lower bound, may be empty but not null + * @param upperBound upper bound, may be empty but not null + * @param dimensionLabel the dimension label for the tablet to be created + * @param lowerBoundType the type of the lower bound, either inclusive or exclusive + * @param upperBoundType the type of the upper bound, either inclusive or exclusive + * @return this instance + */ + public AlterTableOptions addRangePartition(PartialRow lowerBound, + PartialRow upperBound, + String dimensionLabel, + RangePartitionBound lowerBoundType, + RangePartitionBound upperBoundType) { + Preconditions.checkNotNull(lowerBound); + Preconditions.checkNotNull(upperBound); + Preconditions.checkArgument(lowerBound.getSchema().equals(upperBound.getSchema())); + + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.ADD_RANGE_PARTITION); + AlterTableRequestPB.AddRangePartition.Builder builder = + AlterTableRequestPB.AddRangePartition.newBuilder(); + builder.setRangeBounds( + new Operation.OperationsEncoder() + .encodeLowerAndUpperBounds(lowerBound, upperBound, lowerBoundType, upperBoundType)); + if (dimensionLabel != null) { + builder.setDimensionLabel(dimensionLabel); + } + step.setAddRangePartition(builder); + if (!pb.hasSchema()) { + pb.setSchema(ProtobufHelper.schemaToPb(lowerBound.getSchema(), + EnumSet.of(SchemaPBConversionFlags.SCHEMA_PB_WITHOUT_COMMENT, + SchemaPBConversionFlags.SCHEMA_PB_WITHOUT_ID))); + } + return this; + } + + /** + * Similar to the other addRangePartition() methods, but instead of adding a + * range with table-wide hash schema, this method adds a range with + * custom hash schema. + * + * @param range the range with custom hash schema + * @return this instance + */ + public AlterTableOptions addRangePartition(RangePartitionWithCustomHashSchema range) { + Preconditions.checkNotNull(range); + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.ADD_RANGE_PARTITION); + AlterTableRequestPB.AddRangePartition.Builder rangeBuilder = + AlterTableRequestPB.AddRangePartition.newBuilder(); + rangeBuilder.setRangeBounds( + new Operation.OperationsEncoder().encodeLowerAndUpperBounds( + range.getLowerBound(), range.getUpperBound(), + range.getLowerBoundType(), range.getUpperBoundType())); + rangeBuilder.getCustomHashSchemaBuilder().addAllHashSchema( + range.toPB().getHashSchemaList()); + step.setAddRangePartition(rangeBuilder); + if (!pb.hasSchema()) { + pb.setSchema(ProtobufHelper.schemaToPb(range.getLowerBound().getSchema(), + EnumSet.of(SchemaPBConversionFlags.SCHEMA_PB_WITHOUT_COMMENT, + SchemaPBConversionFlags.SCHEMA_PB_WITHOUT_ID))); + } + isAddingRangeWithCustomHashSchema = true; + return this; + } + + /** + * Drop the range partition from the table with the specified inclusive lower bound and exclusive + * upper bound. The bounds must match exactly, and may not span multiple range partitions. + * + * If either row is empty, then that end of the range will be unbounded. If a range column is + * missing a value, the logical minimum value for that column type will be used as the default. + * + * Multiple range partitions may be dropped as part of a single alter table transaction by calling + * this method multiple times. + * + * @param lowerBound inclusive lower bound, can be empty but not null + * @param upperBound exclusive upper bound, can be empty but not null + * @return this instance + */ + public AlterTableOptions dropRangePartition(PartialRow lowerBound, PartialRow upperBound) { + return dropRangePartition(lowerBound, upperBound, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + } + + /** + * Drop the range partition from the table with the specified lower bound and upper bound. + * The bounds must match exactly, and may not span multiple range partitions. + * + * If either row is empty, then that end of the range will be unbounded. If a range column is + * missing a value, the logical minimum value for that column type will be used as the default. + * + * Multiple range partitions may be dropped as part of a single alter table transaction by calling + * this method multiple times. + * + * @param lowerBound inclusive lower bound, can be empty but not null + * @param upperBound exclusive upper bound, can be empty but not null + * @param lowerBoundType the type of the lower bound, either inclusive or exclusive + * @param upperBoundType the type of the upper bound, either inclusive or exclusive + * @return this instance + */ + public AlterTableOptions dropRangePartition(PartialRow lowerBound, + PartialRow upperBound, + RangePartitionBound lowerBoundType, + RangePartitionBound upperBoundType) { + Preconditions.checkNotNull(lowerBound); + Preconditions.checkNotNull(upperBound); + Preconditions.checkArgument(lowerBound.getSchema().equals(upperBound.getSchema())); + + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.DROP_RANGE_PARTITION); + AlterTableRequestPB.DropRangePartition.Builder builder = + AlterTableRequestPB.DropRangePartition.newBuilder(); + builder.setRangeBounds( + new Operation.OperationsEncoder().encodeLowerAndUpperBounds(lowerBound, upperBound, + lowerBoundType, + upperBoundType)); + step.setDropRangePartition(builder); + if (!pb.hasSchema()) { + pb.setSchema(ProtobufHelper.schemaToPb(lowerBound.getSchema(), + EnumSet.of(SchemaPBConversionFlags.SCHEMA_PB_WITHOUT_COMMENT, + SchemaPBConversionFlags.SCHEMA_PB_WITHOUT_ID))); + } + return this; + } + + /** + * Change the comment for the column. + * + * @param name name of the column + * @param comment the new comment for the column, an empty comment means + * deleting an existing comment. + * @return this instance + */ + public AlterTableOptions changeComment(String name, String comment) { + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.ALTER_COLUMN); + AlterTableRequestPB.AlterColumn.Builder alterBuilder = + AlterTableRequestPB.AlterColumn.newBuilder(); + alterBuilder.setDelta( + Common.ColumnSchemaDeltaPB.newBuilder().setName(name).setNewComment(comment)); + step.setAlterColumn(alterBuilder); + return this; + } + + /** + * Change the immutable attribute for the column. + * + * @param name name of the column + * @param immutable the new immutable attribute for the column. + * @return this instance + */ + public AlterTableOptions changeImmutable(String name, boolean immutable) { + if (name.equalsIgnoreCase(Schema.getAutoIncrementingColumnName())) { + throw new IllegalArgumentException("Cannot change immutable for " + + "auto-incrementing column " + Schema.getAutoIncrementingColumnName()); + } + AlterTableRequestPB.Step.Builder step = pb.addAlterSchemaStepsBuilder(); + step.setType(AlterTableRequestPB.StepType.ALTER_COLUMN); + AlterTableRequestPB.AlterColumn.Builder alterBuilder = + AlterTableRequestPB.AlterColumn.newBuilder(); + alterBuilder.setDelta( + Common.ColumnSchemaDeltaPB.newBuilder().setName(name).setImmutable(immutable)); + step.setAlterColumn(alterBuilder); + return this; + } + + /** + * Change the table's extra configuration properties. + * These configuration properties will be merged into existing configuration properties. + * + * If the value of the kv pair is empty, the property will be unset. + * + * @param extraConfig the table's extra configuration properties + * @return this instance + */ + public AlterTableOptions alterExtraConfigs(Map extraConfig) { + pb.putAllNewExtraConfigs(extraConfig); + return this; + } + + /** + * Whether to wait for the table to be fully altered before this alter + * operation is considered to be finished. + *

+ * If false, the alter will finish quickly, but a subsequent + * {@link KuduClient#openTable(String)} may return a {@link KuduTable} with + * an out-of-date schema. + *

+ * If true, the alter will take longer, but the very next schema is guaranteed + * to be up-to-date. + *

+ * If not provided, defaults to true. + *

+ * @param wait whether to wait for the table to be fully altered + * @return this instance + */ + public AlterTableOptions setWait(boolean wait) { + this.wait = wait; + return this; + } + + /** + * @return {@code true} if the alter table operation includes an add or drop partition operation + */ + @InterfaceAudience.Private + boolean hasAddDropRangePartitions() { + return pb.hasSchema(); + } + + /** + * @return the AlterTableRequest protobuf message. + */ + AlterTableRequestPB.Builder getProtobuf() { + return pb; + } + + boolean shouldWait() { + return wait; + } + + List getRequiredFeatureFlags() { + boolean hasImmutables = false; + for (AlterTableRequestPB.Step.Builder step : pb.getAlterSchemaStepsBuilderList()) { + if ((step.getType() == AlterTableRequestPB.StepType.ADD_COLUMN && + step.getAddColumn().getSchema().hasImmutable()) || + (step.getType() == AlterTableRequestPB.StepType.ALTER_COLUMN && + step.getAlterColumn().getDelta().hasImmutable())) { + hasImmutables = true; + break; + } + } + + List requiredFeatureFlags = new ArrayList<>(); + if (hasImmutables) { + requiredFeatureFlags.add( + Integer.valueOf(Master.MasterFeatures.IMMUTABLE_COLUMN_ATTRIBUTE_VALUE)); + } + + if (hasAddDropRangePartitions()) { + requiredFeatureFlags.add(Integer.valueOf(Master.MasterFeatures.RANGE_PARTITION_BOUNDS_VALUE)); + if (isAddingRangeWithCustomHashSchema) { + requiredFeatureFlags.add( + Integer.valueOf(Master.MasterFeatures.RANGE_SPECIFIC_HASH_SCHEMA_VALUE)); + } + } + return requiredFeatureFlags; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AlterTableRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AlterTableRequest.java new file mode 100644 index 0000000000..a27e010480 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AlterTableRequest.java @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.master.Master.AlterTableRequestPB; +import static org.apache.kudu.master.Master.AlterTableResponsePB; +import static org.apache.kudu.master.Master.MasterFeatures; +import static org.apache.kudu.master.Master.TableIdentifierPB; + +import java.util.Collection; +import java.util.List; + +import com.google.common.collect.ImmutableList; +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.util.Pair; + +/** + * RPC used to alter a table. When it returns it doesn't mean that the table is altered, + * a success just means that the master accepted it. + */ +@InterfaceAudience.Private +class AlterTableRequest extends KuduRpc { + + static final String ALTER_TABLE = "AlterTable"; + private final String name; + private final AlterTableRequestPB.Builder builder; + private final List featureFlags; + + AlterTableRequest(KuduTable masterTable, + String name, + AlterTableOptions ato, + Timer timer, + long timeoutMillis) { + super(masterTable, timer, timeoutMillis); + this.name = name; + this.builder = ato.getProtobuf(); + this.featureFlags = ato.getRequiredFeatureFlags(); + } + + @Override + Message createRequestPB() { + TableIdentifierPB tableID = TableIdentifierPB.newBuilder().setTableName(name).build(); + this.builder.setTable(tableID); + return this.builder.build(); + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return ALTER_TABLE; + } + + @Override + Pair deserialize(final CallResponse callResponse, + String tsUUID) throws KuduException { + final AlterTableResponsePB.Builder respBuilder = AlterTableResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), respBuilder); + AlterTableResponse response = new AlterTableResponse( + timeoutTracker.getElapsedMillis(), + tsUUID, + respBuilder.hasTableId() ? respBuilder.getTableId().toStringUtf8() : null); + + return new Pair( + response, respBuilder.hasError() ? respBuilder.getError() : null); + } + + @Override + Collection getRequiredFeatures() { + return featureFlags; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AlterTableResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AlterTableResponse.java new file mode 100644 index 0000000000..bc4e9cb8a4 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AlterTableResponse.java @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class AlterTableResponse extends KuduRpcResponse { + + private String tableId; + + /** + * @param elapsedMillis Time in milliseconds since RPC creation to now. + */ + AlterTableResponse(long elapsedMillis, String tsUUID, String tableId) { + super(elapsedMillis, tsUUID); + this.tableId = tableId; + } + + /** + * @return the ID of the altered table + */ + public String getTableId() { + return tableId; + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java new file mode 100644 index 0000000000..84ffedf975 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java @@ -0,0 +1,3112 @@ +/* + * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.kudu.client; + +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static org.apache.kudu.client.ExternalConsistencyMode.CLIENT_PROPAGATED; +import static org.apache.kudu.rpc.RpcHeader.ErrorStatusPB.RpcErrorCodePB.ERROR_INVALID_REQUEST; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.security.cert.CertificateException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import javax.annotation.concurrent.GuardedBy; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import com.google.protobuf.ByteString; +import com.google.protobuf.Message; +import com.stumbleupon.async.Callback; +import com.stumbleupon.async.Deferred; +import io.netty.bootstrap.Bootstrap; +import io.netty.buffer.PooledByteBufAllocator; +import io.netty.channel.ChannelOption; +import io.netty.channel.EventLoopGroup; +import io.netty.channel.nio.NioEventLoopGroup; +import io.netty.channel.socket.nio.NioSocketChannel; +import io.netty.util.HashedWheelTimer; +import io.netty.util.Timeout; +import io.netty.util.Timer; +import io.netty.util.TimerTask; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.Common; +import org.apache.kudu.Schema; +import org.apache.kudu.client.Client.AuthenticationCredentialsPB; +import org.apache.kudu.master.Master; +import org.apache.kudu.master.Master.GetTableLocationsResponsePB; +import org.apache.kudu.master.Master.TSInfoPB; +import org.apache.kudu.master.Master.TableIdentifierPB; +import org.apache.kudu.master.Master.TabletLocationsPB; +import org.apache.kudu.security.Token; +import org.apache.kudu.security.Token.SignedTokenPB; +import org.apache.kudu.util.AsyncUtil; +import org.apache.kudu.util.NetUtil; +import org.apache.kudu.util.Pair; + +/** + * A fully asynchronous and thread-safe client for Kudu. + *

+ * A single Kudu client instance corresponds to a single remote Kudu cluster, + * and can be used to read or write any number of tables within that cluster. + * An application should use exactly one Kudu client instance per distinct Kudu + * cluster it connects to. + * + * In rare cases where a single application needs multiple instances connected + * to the same cluster, or when many applications each using one or more Kudu + * client instances are running on the same machine, it may be necessary to + * adjust the instances to use less resources. See the options in + * {@link AsyncKuduClientBuilder}. + * + *

Creating a client instance

An {@link AsyncKuduClient} instance may + * be created using the {@link AsyncKuduClient.AsyncKuduClientBuilder} class. If + * a synchronous API is preferred, {@link KuduClient.KuduClientBuilder} may be + * used instead. See the documentation on these classes for more details on + * client configuration options. + * + *

Authenticating to a secure cluster

A Kudu cluster may be configured + * such that it requires clients to connect using strong authentication. Clients + * can authenticate to such clusters using either of two methods: + *
    + *
  1. Kerberos credentials
  2. + *
  3. Authentication tokens
  4. + *
+ * + * In a typical environment, Kerberos credentials are used for non-distributed + * client applications and for applications which spawn distributed + * jobs. Tokens are used for the tasks of distributed jobs, since those + * tasks do not have access to the user's Kerberos credentials. + * + *

Authenticating using Kerberos credentials

+ * + * In order to integrate with Kerberos, Kudu uses the standard Java + * Authentication and Authorization Service (JAAS) API provided by the JDK. + * JAAS provides a common way for applications to initialize Kerberos + * credentials, store these credentials in a {@link javax.security.auth.Subject} + * instance, and associate the Subject with the current thread of execution. + * The Kudu client then accesses the Kerberos credentials in the + * {@link javax.security.auth.Subject} and uses them to authenticate to the + * remote cluster as necessary. + *

+ * Kerberos credentials are typically obtained in one of two ways: + *

    + *
  1. The Kerberos ticket cache
  2. + *
  3. A keytab file
  4. + *
+ * + *

Authenticating from the Kerberos ticket cache

+ * + * The Kerberos ticket cache is a file stored on the local file system + * which is automatically initialized when a user runs kinit at the + * command line. This is the predominant method for authenticating users in + * interactive applications: the user is expected to have run kinit + * recently, and the application will find the appropriate credentials in the + * ticket cache. + *

+ * In the case of the Kudu client, Kudu will automatically look for credentials + * in the standard system-configured ticket cache location. No additional code + * needs to be written to enable this behavior. + *

+ * Kudu will automatically detect if the ticket it has obtained from the ticket + * cache is about to expire. When that is the case, it will attempt to re-read + * the ticket cache to obtain a new ticket with a later expiration time. So, if + * an application needs to run for longer than the lifetime of a single ticket, + * the user must ensure that the ticket cache is periodically refreshed, for + * example by re-running 'kinit' once each day. + * + *

Authenticating from a keytab

+ * + * Long-running applications typically obtain Kerberos credentials from a + * Kerberos keytab file. A keytab is essentially a saved password, and + * allows the application to obtain new Kerberos tickets whenever the prior + * ticket is about to expire. + *

+ * The Kudu client does not provide any utility code to facilitate logging in + * from a keytab. Instead, applications should invoke the JAAS APIs directly, + * and then ensure that the resulting {@link javax.security.auth.Subject} + * instance is associated with the current thread's + * {@link java.security.AccessControlContext} when instantiating the Kudu client + * instance for the first time. The {@link javax.security.auth.Subject} instance + * will be stored and used whenever Kerberos authentication is required. + *

+ * Note: if the Kudu client is instantiated with a + * {@link javax.security.auth.Subject} as described above, it will not + * make any attempt to re-login from the keytab. Instead, the application should + * arrange to periodically re-initiate the login process and update the + * credentials stored in the same Subject instance as was provided when the + * client was instantiated. + *

+ * The easiest way to authenticate using a keytab is by creating a JAAS config + * file such as this:

+ * ExampleLoginContextName {
+ *   com.sun.security.auth.module.Krb5LoginModule required
+ *   useKeyTab = true
+ *   keyTab = "/path/to/app.keytab"
+ *   principal = "appuser";
+ * };
+ * 
+ * This can then be passed to the application by adding {@code + * -Djava.security.auth.login.config=/path/to/jaas.conf} to the command when + * starting it. + * This authentication method needs to be set in the code as well by wrapping + * the code interacting with Kudu with a {@link + * javax.security.auth.Subject#doAs} after creating a login context using the + * JAAS config, logging in, and passing the {@link javax.security.auth.Subject} + * to the doAs: + *
+ * LoginContext login = new LoginContext("ExampleLoginContextName");
+ * login.login();
+ * KuduClient c = Subject.doAs(login.getSubject(),
+ *                             (PrivilegedAction<KuduClient>) () -> {
+ *   return myClientBuilder.build();
+ * });
+ * 
+ * In this case it's necessary to periodically re-login as needed and run doAs + * using the new subject. + *

+ * In the context of the Hadoop ecosystem, the {@code + * org.apache.hadoop.security.UserGroupInformation} class provides utility + * methods to login from a keytab and then run code as the resulting {@link + * javax.security.auth.Subject}:

+ *   UserGroupInformation.loginUserFromKeytab("appuser", "/path/to/app.keytab");
+ *   KuduClient c = UserGroupInformation.getLoginUser().doAs(
+ *     new PrivilegedExceptionAction() {
+ *       @Override
+ *       public KuduClient run() throws Exception {
+ *         return myClientBuilder.build();
+ *       }
+ *     }
+ *   );
+ * 
The {@code UserGroupInformation} class will also automatically + * start a thread to periodically re-login from the keytab. It's not necessary + * to pass a JAAS config. + * + *

Debugging Kudu's usage of Kerberos credentials

+ * + * The Kudu client emits DEBUG-level logs under the + * {@code org.apache.kudu.client.SecurityContext} slf4j category. Enabling DEBUG + * logging for this class may help you understand which credentials are being + * obtained by the Kudu client when it is instantiated. Additionally, if the + * Java system property {@code kudu.jaas.debug} is set to {@code true}, Kudu + * will enable the {@code debug} option when configuring {@code Krb5LoginModule} + * when it attempts to log in from a ticket cache. JDK-specific system properties + * such as {@code sun.security.krb5.debug} may also be useful in troubleshooting + * Kerberos authentication failures. + * + *

Authenticating using tokens

+ * + * In the case of distributed applications, the worker tasks often do not have + * access to Kerberos credentials such as ticket caches or keytabs. + * Additionally, there may be hundreds or thousands of workers with relatively + * short life-times, and if each task attempted to authenticate using Kerberos, + * the amount of load on the Kerberos infrastructure could be substantial enough + * to cause instability. To solve this issue, Kudu provides support for + * authentication tokens. + *

+ * An authentication token is a time-limited credential which can be obtained by + * an application which has already authenticated via Kerberos. The token is + * represented by an opaque byte string, and it can be passed from one client to + * another to transfer credentials. + *

+ * A token may be generated using the + * {@link AsyncKuduClient#exportAuthenticationCredentials()} API, and then + * imported to another client using + * {@link AsyncKuduClient#importAuthenticationCredentials(byte[])}. + * + *

Authentication in Spark jobs

+ * + * Note that the Spark integration provided by the kudu-spark package + * automatically handles the interaction with Kerberos and the passing of tokens + * from the Spark driver to tasks. Refer to the Kudu documentation for details + * on how to submit a Spark job on a secure cluster. + * + *

API Compatibility

+ * + * Note that some methods in the Kudu client implementation are public but + * annotated with the InterfaceAudience.Private annotation. This + * annotation indicates that, despite having {@code public} visibility, the + * method is not part of the public API and there is no guarantee that its + * existence or behavior will be maintained in subsequent versions of the Kudu + * client library. + * + * Other APIs are annotated with the InterfaceStability.Unstable annotation. + * These APIs are meant for public consumption but may change between minor releases. + * Note that the asynchronous client is currently considered unstable. + * + *

Thread Safety

+ * + * The Kudu client instance itself is thread-safe; however, not all associated + * classes are themselves thread-safe. For example, neither + * {@link AsyncKuduSession} nor its synchronous wrapper {@link KuduSession} is + * thread-safe. Refer to the documentation for each individual class for more + * details. + * + *

Asynchronous usage

+ * + * This client is fully non-blocking, any blocking operation will return a + * {@link Deferred} instance to which you can attach a {@link Callback} chain + * that will execute when the asynchronous operation completes. + *

+ * The asynchronous calls themselves typically do not throw exceptions. Instead, + * an {@code errback} should be attached which will be called with the Exception + * that occurred. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class AsyncKuduClient implements AutoCloseable { + + public static final Logger LOG = LoggerFactory.getLogger(AsyncKuduClient.class); + public static final int SLEEP_TIME = 500; + public static final byte[] EMPTY_ARRAY = new byte[0]; + public static final long NO_TIMESTAMP = -1; + public static final long INVALID_TXN_ID = -1; + public static final long DEFAULT_OPERATION_TIMEOUT_MS = 30000; + public static final int NO_SOFT_DELETED_STATE_RESERVED_SECONDS = 0; + public static final long DEFAULT_KEEP_ALIVE_PERIOD_MS = 15000; // 25% of the default scanner ttl. + public static final long DEFAULT_NEGOTIATION_TIMEOUT_MS = 10000; + private static final long MAX_RPC_ATTEMPTS = 100; + + /** + * The number of tablets to fetch from the master in a round trip when performing + * a lookup of a single partition (e.g. for a write), or re-looking-up a tablet with + * stale information. + */ + private static final int FETCH_TABLETS_PER_POINT_LOOKUP = 10; + + /** + * The number of tablets to fetch from the master when looking up a range of + * tablets. + */ + static int FETCH_TABLETS_PER_RANGE_LOOKUP = 1000; + + private final Bootstrap bootstrap; + + /** + * This map contains data cached from calls to the master's + * GetTableLocations RPC. This map is keyed by table ID. + */ + private final ConcurrentHashMap tableLocations = + new ConcurrentHashMap<>(); + + /** A cache to keep track of already opened connections to Kudu servers. */ + private final ConnectionCache connectionCache; + + @GuardedBy("sessions") + private final Set sessions = new HashSet<>(); + + /** The Hive Metastore configuration of the most recently connected-to master. */ + @GuardedBy("this") + private HiveMetastoreConfig hiveMetastoreConfig = null; + + // Since RPCs to the masters also go through RpcProxy, we need to treat them as if they were a + // normal table. We'll use the following fake table name to identify places where we need special + // handling. + // TODO(aserbin) clean this up + static final String MASTER_TABLE_NAME_PLACEHOLDER = "Kudu Master"; + private final KuduTable masterTable; + private final List masterAddresses; + + private final HashedWheelTimer timer; + + private final String clientId; + + /** + * Timestamp required for HybridTime external consistency through timestamp + * propagation. + * @see "src/kudu/common/common.proto" + */ + private long lastPropagatedTimestamp = NO_TIMESTAMP; + + /** + * Set to true once we have connected to a master at least once. + * + * This determines whether exportAuthenticationCredentials() needs to + * proactively connect to the cluster to obtain a token. + */ + private volatile boolean hasConnectedToMaster = false; + + /** + * The location of this client as assigned by the leader master. + * + * If no location is assigned, will be an empty string. + */ + private String location = ""; + + /** + * The ID of the cluster that this client is connected to. + * + * It will be an empty string if the client is not connected + * or the client is connected to a cluster that doesn't support + * cluster IDs + */ + private String clusterId = ""; + + /** + * Semaphore used to rate-limit master lookups + * Once we have more than this number of concurrent master lookups, we'll + * start to throttle ourselves slightly. + * @see #acquireMasterLookupPermit + */ + private final Semaphore masterLookups = new Semaphore(50); + + private final Random sleepRandomizer = new Random(); + + private final long defaultOperationTimeoutMs; + + private final long defaultAdminOperationTimeoutMs; + + private final Statistics statistics; + + private final boolean statisticsDisabled; + + private final RequestTracker requestTracker; + + @InterfaceAudience.LimitedPrivate("Test") + final SecurityContext securityContext; + + /** A helper to facilitate re-acquiring of authentication token if current one expires. */ + private final AuthnTokenReacquirer tokenReacquirer; + + /** A helper to facilitate retrieving authz tokens */ + private final AuthzTokenCache authzTokenCache; + + private volatile boolean closed; + + private AsyncKuduClient(AsyncKuduClientBuilder b) { + this.bootstrap = b.createBootstrap(); + this.masterAddresses = b.masterAddresses; + this.masterTable = new KuduTable(this, MASTER_TABLE_NAME_PLACEHOLDER, + MASTER_TABLE_NAME_PLACEHOLDER, null, null, 1, null, null, null); + this.defaultOperationTimeoutMs = b.defaultOperationTimeoutMs; + this.defaultAdminOperationTimeoutMs = b.defaultAdminOperationTimeoutMs; + this.statisticsDisabled = b.statisticsDisabled; + this.statistics = statisticsDisabled ? null : new Statistics(); + this.timer = b.timer; + this.clientId = UUID.randomUUID().toString().replace("-", ""); + this.requestTracker = new RequestTracker(clientId); + + this.securityContext = new SecurityContext(); + this.connectionCache = new ConnectionCache(securityContext, bootstrap, b.saslProtocolName, + b.requireAuthentication, !b.encryptionPolicy.equals(EncryptionPolicy.OPTIONAL), + b.encryptionPolicy.equals(EncryptionPolicy.REQUIRED), b.defaultNegotiationTimeoutMs); + this.tokenReacquirer = new AuthnTokenReacquirer(this); + this.authzTokenCache = new AuthzTokenCache(this); + } + + /** + * Get a proxy to send RPC calls to the specified server. The result proxy object does not + * restrict the type of credentials that may be used to connect to the server: it will use the + * secondary credentials if available, otherwise SASL credentials are used to authenticate + * the client when negotiating the connection to the server. + * + * @param serverInfo server's information + * @return the proxy object bound to the target server + */ + @Nonnull + RpcProxy newRpcProxy(final ServerInfo serverInfo) { + return newRpcProxy(serverInfo, Connection.CredentialsPolicy.ANY_CREDENTIALS); + } + + /** + * Get a proxy to send RPC calls to the specified server. The result proxy object should use + * a connection to the server negotiated with the specified credentials policy. + * + * @param serverInfo target server information + * @param credentialsPolicy authentication credentials policy to use for the connection + * negotiation + * @return the proxy object bound to the target server + */ + @Nonnull + private RpcProxy newRpcProxy(final ServerInfo serverInfo, + Connection.CredentialsPolicy credentialsPolicy) { + final Connection connection = connectionCache.getConnection(serverInfo, credentialsPolicy); + return new RpcProxy(this, connection); + } + + /** + * Get a proxy to send RPC calls to Kudu master at the specified end-point. + * + * @param hostPort master end-point + * @param inetAddress master ip-address + * @param credentialsPolicy credentials policy to use for the connection negotiation to the target + * master server + * @return the proxy object bound to the target master + */ + @Nonnull + RpcProxy newMasterRpcProxy(HostAndPort hostPort, + InetAddress inetAddress, + Connection.CredentialsPolicy credentialsPolicy) { + // We should have a UUID to construct ServerInfo for the master, but we have a chicken + // and egg problem, we first need to communicate with the masters to find out about them, + // and that's what we're trying to do. The UUID is just used for logging and cache key, + // so instead we just use concatenation of master host and port, prefixed with "master-". + return newRpcProxy( + new ServerInfo(getFakeMasterUuid(hostPort), + hostPort, + inetAddress, + /* location= */""), + credentialsPolicy); + } + + static String getFakeMasterUuid(HostAndPort hostPort) { + return "master-" + hostPort.toString(); + } + + void reconnectToCluster(Callback cb, + Callback eb) { + + final class ReconnectToClusterCB implements Callback { + private final Callback cb; + + ReconnectToClusterCB(Callback cb) { + this.cb = Preconditions.checkNotNull(cb); + } + + /** + * Report on the token re-acquisition results. The result authn token might be null: in that + * case the SASL credentials will be used to negotiate future connections. + */ + @Override + public Void call(ConnectToClusterResponse resp) throws Exception { + final Master.ConnectToMasterResponsePB masterResponsePB = resp.getConnectResponse(); + if (masterResponsePB.hasAuthnToken()) { + LOG.info("connect to master: received a new authn token"); + securityContext.setAuthenticationToken(masterResponsePB.getAuthnToken()); + cb.call(true); + } else { + LOG.warn("connect to master: received no authn token"); + securityContext.setAuthenticationToken(null); + cb.call(false); + } + synchronized (AsyncKuduClient.this) { + location = masterResponsePB.getClientLocation(); + clusterId = masterResponsePB.getClusterId(); + } + return null; + } + } + + ConnectToCluster.run(masterTable, masterAddresses, null, defaultAdminOperationTimeoutMs, + Connection.CredentialsPolicy.PRIMARY_CREDENTIALS).addCallbacks( + new ReconnectToClusterCB(cb), eb); + } + + /** + * Updates the last timestamp received from a server. Used for CLIENT_PROPAGATED + * external consistency. + * + * @param lastPropagatedTimestamp the last timestamp received from a server + */ + public synchronized void updateLastPropagatedTimestamp(long lastPropagatedTimestamp) { + if (this.lastPropagatedTimestamp == NO_TIMESTAMP || + this.lastPropagatedTimestamp < lastPropagatedTimestamp) { + this.lastPropagatedTimestamp = lastPropagatedTimestamp; + } + } + + /** + * Returns the last timestamp received from a server. Used for CLIENT_PROPAGATED + * external consistency. Note that the returned timestamp is encoded and cannot be + * interpreted as a raw timestamp. + * + * @return a long indicating the specially-encoded last timestamp received from a server + */ + public synchronized long getLastPropagatedTimestamp() { + return lastPropagatedTimestamp; + } + + /** + * Checks if the client received any timestamps from a server. Used for + * CLIENT_PROPAGATED external consistency. + * + * @return true if last propagated timestamp has been set + */ + public synchronized boolean hasLastPropagatedTimestamp() { + return lastPropagatedTimestamp != NO_TIMESTAMP; + } + + /** + * Returns a string representation of this client's location. If this + * client was not assigned a location, returns the empty string. + * + * @return a string representation of this client's location + */ + public synchronized String getLocationString() { + return location; + } + + /** + * Returns the ID of the cluster that this client is connected to. + * It will be an empty string if the client is not connected or + * the client is connected to a cluster that doesn't support + * cluster IDs. + * + * @return the ID of the cluster that this client is connected to + */ + public synchronized String getClusterId() { + return clusterId; + } + + /** + * Returns the {@link Timer} instance held by this client. This timer should + * be used everywhere for scheduling tasks after a delay, e.g., for + * timeouts. + * @return the time instance held by this client + */ + Timer getTimer() { + return timer; + } + + /** + * Returns the unique client id assigned to this client. + * @return the unique client id assigned to this client. + */ + String getClientId() { + return clientId; + } + + /** + * Returns a synchronous {@link KuduClient} which wraps this asynchronous client. + * Calling {@link KuduClient#close} on the returned client will close this client. + * If this asynchronous client should outlive the returned synchronous client, + * then do not close the synchronous client. + * @return a new synchronous {@code KuduClient} + */ + public KuduClient syncClient() { + return new KuduClient(this); + } + + /** + * Create a table on the cluster with the specified name, schema, and table configurations. + * If the primary key columns of the table schema aren't specified first, the deferred result + * will be a {@link NonRecoverableException} + * + * @param name the table's name + * @param schema the table's schema + * @param builder a builder containing the table's configurations + * @return a deferred object to track the progress of the createTable command that gives + * an object to communicate with the created table + */ + public Deferred createTable(final String name, Schema schema, + CreateTableOptions builder) { + checkIsClosed(); + if (builder == null) { + throw new IllegalArgumentException("CreateTableOptions may not be null"); + } + final Common.PartitionSchemaPB ps = builder.getBuilder().getPartitionSchema(); + if (!ps.hasRangeSchema() && ps.getHashSchemaCount() == 0) { + throw new IllegalArgumentException("Table partitioning must be specified using " + + "setRangePartitionColumns or addHashPartitions"); + + } + + // Send the CreateTable RPC. + final CreateTableRequest create = new CreateTableRequest(this.masterTable, + name, + schema, + builder, + timer, + defaultAdminOperationTimeoutMs); + Deferred createTableD = sendRpcToTablet(create); + + // Add a callback that converts the response into a KuduTable. + Deferred kuduTableD = createTableD.addCallbackDeferring( + resp -> getTableSchema(name, resp.getTableId(), create)); + + if (!builder.shouldWait()) { + return kuduTableD; + } + + // If requested, add a callback that waits until all of the table's tablets + // have been created. + return kuduTableD.addCallbackDeferring(tableResp -> { + TableIdentifierPB.Builder table = TableIdentifierPB.newBuilder() + .setTableId(ByteString.copyFromUtf8(tableResp.getTableId())); + return getDelayedIsCreateTableDoneDeferred(table, create, tableResp); + }); + } + + /** + * Check whether a previously issued createTable() is done. + * @param name table's name + * @return a deferred object to track the progress of the isCreateTableDone command + */ + public Deferred isCreateTableDone(String name) { + return doIsCreateTableDone(TableIdentifierPB.newBuilder().setTableName(name), null); + } + + /** + * Check whether a previously issued createTable() is done. + * @param table table identifier + * @param parent parent RPC (for tracing), if any + * @return a deferred object to track the progress of the isCreateTableDone command + */ + private Deferred doIsCreateTableDone( + @Nonnull TableIdentifierPB.Builder table, + @Nullable KuduRpc parent) { + checkIsClosed(); + IsCreateTableDoneRequest request = new IsCreateTableDoneRequest(this.masterTable, + table, + timer, + defaultAdminOperationTimeoutMs); + if (parent != null) { + request.setParentRpc(parent); + } + return sendRpcToTablet(request); + } + + /** + * Delete a table with the specified name. + * @param name the table's name + * @param reserveSeconds the soft deleted table to be alive time + * @return a deferred object to track the progress of the deleteTable command + */ + public Deferred deleteTable(String name, + int reserveSeconds) { + checkIsClosed(); + DeleteTableRequest delete = new DeleteTableRequest(this.masterTable, + name, + timer, + defaultAdminOperationTimeoutMs, + reserveSeconds); + return sendRpcToTablet(delete); + } + + /** + * Delete a table with the specified name. + * The behavior of DeleteRPC is controlled by the + * '--default_deleted_table_reserve_seconds' flag on master. + * @param name the table's name + * @return a deferred object to track the progress of the deleteTable command + */ + public Deferred deleteTable(String name) { + checkIsClosed(); + DeleteTableRequest delete = new DeleteTableRequest(this.masterTable, + name, + timer, + defaultAdminOperationTimeoutMs); + return sendRpcToTablet(delete); + } + + /** + * Recall a soft-deleted table on the cluster with the specified id + * @param id the table's id + * @return a deferred object to track the progress of the recall command + */ + public Deferred recallDeletedTable(String id) { + return recallDeletedTable(id, ""); + } + + /** + * Recall a soft-deleted table on the cluster with the specified id + * @param id the table's id + * @param newTableName the table's new name after recall + * @return a deferred object to track the progress of the recall command + */ + public Deferred recallDeletedTable(String id, + String newTableName) { + checkIsClosed(); + RecallDeletedTableRequest recall = new RecallDeletedTableRequest( + this.masterTable, + id, + newTableName, + timer, + defaultAdminOperationTimeoutMs); + return sendRpcToTablet(recall); + } + + /** + * Alter a table on the cluster as specified by the builder. + * + * @param name the table's name (old name if the table is being renamed) + * @param ato the alter table options + * @return a deferred object to track the progress of the alter command + */ + public Deferred alterTable(String name, AlterTableOptions ato) { + checkIsClosed(); + final AlterTableRequest alter = new AlterTableRequest(this.masterTable, + name, + ato, + timer, + defaultAdminOperationTimeoutMs); + Deferred responseD = sendRpcToTablet(alter); + + if (ato.hasAddDropRangePartitions()) { + // Clear the table locations cache so the new partition is immediately visible. + responseD = responseD.addCallback(new Callback() { + @Override + public AlterTableResponse call(AlterTableResponse resp) { + tableLocations.remove(resp.getTableId()); + return resp; + } + + @Override + public String toString() { + return "ClearTableLocationsCacheCB"; + } + }).addErrback(new Callback() { + @Override + public Exception call(Exception e) { + // We clear the cache even on failure, just in + // case the alter table operation actually succeeded. + tableLocations.clear(); + return e; + } + + @Override + public String toString() { + return "ClearTableLocationsCacheEB"; + } + }); + } + if (!ato.shouldWait()) { + return responseD; + } + + // If requested, add a callback that waits until all of the table's tablets + // have been altered. + return responseD.addCallbackDeferring(resp -> { + TableIdentifierPB.Builder table = TableIdentifierPB.newBuilder() + .setTableId(ByteString.copyFromUtf8(resp.getTableId())); + return getDelayedIsAlterTableDoneDeferred(table, alter, resp); + }); + } + + /** + * Check whether a previously issued alterTable() is done. + * @param name table name + * @return a deferred object to track the progress of the isAlterTableDone command + */ + public Deferred isAlterTableDone(String name) { + return doIsAlterTableDone(TableIdentifierPB.newBuilder().setTableName(name), null); + } + + /** + * Check whether a previously issued alterTable() is done. + * @param table table identifier + * @param parent parent RPC (for tracing), if any + * @return a deferred object to track the progress of the isAlterTableDone command + */ + private Deferred doIsAlterTableDone( + @Nonnull TableIdentifierPB.Builder table, + @Nullable KuduRpc parent) { + checkIsClosed(); + IsAlterTableDoneRequest request = new IsAlterTableDoneRequest(this.masterTable, + table, + timer, + defaultAdminOperationTimeoutMs); + request.setParentRpc(parent); + return sendRpcToTablet(request); + } + + /** + * Get the list of running tablet servers. + * @return a deferred object that yields a list of tablet servers + */ + public Deferred listTabletServers() { + checkIsClosed(); + ListTabletServersRequest rpc = new ListTabletServersRequest(this.masterTable, + timer, + defaultAdminOperationTimeoutMs); + return sendRpcToTablet(rpc); + } + + /** + * Gets a table's schema by ID or by name. If both are provided, table id is preferred. + * + * @param tableName name of table + * @param tableId immutable ID of table + * @param parent parent RPC (for tracing), if any + * @return a deferred object that yields the schema + */ + private Deferred getTableSchema( + @Nullable final String tableName, + @Nullable String tableId, + @Nullable KuduRpc parent) { + Preconditions.checkArgument(tableId != null || tableName != null); + + // Prefer a lookup by table ID over name, since the former is immutable. + // For backwards compatibility with older tservers, we don't require authz + // token support. + GetTableSchemaRequest rpc = new GetTableSchemaRequest(this.masterTable, + tableId, + tableId != null ? null : tableName, + timer, + defaultAdminOperationTimeoutMs, + /*requiresAuthzTokenSupport=*/false); + rpc.setParentRpc(parent); + return sendRpcToTablet(rpc).addCallback(resp -> { + // When opening a table, clear the existing cached non-covered range entries. + // This avoids surprises where a new table instance won't be able to see the + // current range partitions of a table for up to the TTL. + TableLocationsCache cache = tableLocations.get(resp.getTableId()); + if (cache != null) { + cache.clearNonCoveredRangeEntries(); + } + SignedTokenPB authzToken = resp.getAuthzToken(); + if (authzToken != null) { + authzTokenCache.put(resp.getTableId(), authzToken); + } + + LOG.debug("Opened table {}", resp.getTableId()); + return new KuduTable(AsyncKuduClient.this, + resp.getTableName(), + resp.getTableId(), + resp.getSchema(), + resp.getPartitionSchema(), + resp.getNumReplicas(), + resp.getExtraConfig(), + resp.getOwner(), + resp.getComment()); + }); + } + + /** + * Get the list of all the regular (i.e. not soft-deleted) tables. + * @return a deferred object that yields a list of all the tables + */ + public Deferred getTablesList() { + return getTablesList(null, false); + } + + /** + * Get a list of regular table names. Passing a null filter returns all the tables. When a + * filter is specified, it only returns tables that satisfy a substring match. + * @param nameFilter an optional table name filter + * @return a deferred that yields the list of table names + */ + public Deferred getTablesList(String nameFilter) { + ListTablesRequest rpc = new ListTablesRequest(this.masterTable, + nameFilter, + false, + timer, + defaultAdminOperationTimeoutMs); + return sendRpcToTablet(rpc); + } + + /** + * Get a list of table names. Passing a null filter returns all the tables. When a filter is + * specified, it only returns tables that satisfy a substring match. + * @param nameFilter an optional table name filter + * @param showSoftDeleted whether to display only regular (i.e. not soft deleted) + * tables or all tables (i.e. soft deleted tables and regular tables) + * @return a deferred that yields the list of table names + */ + public Deferred getTablesList(String nameFilter, boolean showSoftDeleted) { + ListTablesRequest rpc = new ListTablesRequest(this.masterTable, + nameFilter, + showSoftDeleted, + timer, + defaultAdminOperationTimeoutMs); + return sendRpcToTablet(rpc); + } + + /** + * Get the list of all the soft deleted tables. + * @return a deferred object that yields a list of all the soft deleted tables + */ + public Deferred getSoftDeletedTablesList() { + return getTablesList(null, true); + } + + /** + * Get table's statistics from master. + * @param name the table's name + * @return an deferred KuduTableStatistics + */ + public Deferred getTableStatistics(String name) { + GetTableStatisticsRequest rpc = new GetTableStatisticsRequest(this.masterTable, + name, + timer, + defaultAdminOperationTimeoutMs); + + return sendRpcToTablet(rpc).addCallback(resp -> + new KuduTableStatistics(resp.getOnDiskSize(), resp.getLiveRowCount()) + ); + } + + /** + * Test if a table exists. + * @param name a non-null table name + * @return true if the table exists, else false + */ + public Deferred tableExists(final String name) { + if (name == null) { + throw new IllegalArgumentException("The table name cannot be null"); + } + + return AsyncUtil.addCallbacksDeferring( + getTableSchema(name, null, null), + table -> Deferred.fromResult(true), + (Callback, Exception>) e -> { + if (e instanceof NonRecoverableException) { + Status status = ((NonRecoverableException) e).getStatus(); + if (status.isNotFound()) { + return Deferred.fromResult(false); + } + } + return Deferred.fromError(e); + }); + } + + /** + * Open the table with the given id. + * + * @param id the id of the table to open + * @return a deferred KuduTable + */ + Deferred openTableById(String id) { + checkIsClosed(); + return getTableSchema(null, id, null); + } + + /** + * Open the table with the given name. + * + * New range partitions created by other clients will immediately be available + * after opening the table. + * + * @param name table to open + * @return a deferred KuduTable + */ + public Deferred openTable(String name) { + checkIsClosed(); + return getTableSchema(name, null, null); + } + + /** + * Export serialized authentication data that may be passed to a different + * client instance and imported to provide that client the ability to connect + * to the cluster. + */ + @InterfaceStability.Unstable + public Deferred exportAuthenticationCredentials() { + // This is basically just a hacky way to encapsulate the necessary bits to + // properly do exponential backoff on retry; there's no actual "RPC" to send. + KuduRpc fakeRpc = buildFakeRpc("exportAuthenticationCredentials", null); + + // Store the Deferred locally; callback() or errback() on the RPC will + // reset it and we'd return a different, non-triggered Deferred. + Deferred fakeRpcD = fakeRpc.getDeferred(); + doExportAuthenticationCredentials(fakeRpc); + return fakeRpcD; + } + + private void doExportAuthenticationCredentials( + final KuduRpc fakeRpc) { + // If we've already connected to the master, use the authentication + // credentials that we received when we connected. + if (hasConnectedToMaster) { + fakeRpc.callback(securityContext.exportAuthenticationCredentials()); + return; + } + + // We have no authn data -- connect to the master, which will fetch + // new info. + fakeRpc.attempt++; + getMasterTableLocationsPB(null) + .addCallback(new MasterLookupCB(masterTable, + /* partitionKey */ null, + /* requestedBatchSize */ 1)) + .addCallback(ignored -> { + // Just call ourselves again; we're guaranteed to have the + // authentication credentials. + assert hasConnectedToMaster; + doExportAuthenticationCredentials(fakeRpc); + return null; + }) + .addErrback(new RetryTaskErrback<>( + fakeRpc, ignored -> doExportAuthenticationCredentials(fakeRpc))); + } + + @InterfaceAudience.LimitedPrivate("Test") + public AuthzTokenCache getAuthzTokenCache() { + return this.authzTokenCache; + } + + /** + * Get the Hive Metastore configuration of the most recently connected-to leader master, or + * {@code null} if the Hive Metastore integration is not enabled. + */ + @InterfaceAudience.LimitedPrivate("Impala") + @InterfaceStability.Unstable + public Deferred getHiveMetastoreConfig() { + // This is basically just a hacky way to encapsulate the necessary bits to + // properly do exponential backoff on retry; there's no actual "RPC" to send. + KuduRpc fakeRpc = buildFakeRpc("getHiveMetastoreConfig", null); + + // Store the Deferred locally; callback() or errback() on the RPC will + // reset it and we'd return a different, non-triggered Deferred. + Deferred fakeRpcD = fakeRpc.getDeferred(); + doGetHiveMetastoreConfig(fakeRpc); + return fakeRpcD; + } + + private void doGetHiveMetastoreConfig(final KuduRpc fakeRpc) { + // If we've already connected to the master, use the config we received when we connected. + if (hasConnectedToMaster) { + // Take a ref to the HMS config under the lock, but invoke the callback + // chain with the lock released. + HiveMetastoreConfig c; + synchronized (this) { + c = hiveMetastoreConfig; + } + fakeRpc.callback(c); + return; + } + + // We have no Metastore config -- connect to the master, which will fetch new info. + fakeRpc.attempt++; + getMasterTableLocationsPB(null) + .addCallback(new MasterLookupCB(masterTable, + /* partitionKey */ null, + /* requestedBatchSize */ 1)) + .addCallback(ignored -> { + // Just call ourselves again; we're guaranteed to have the HMS config. + assert hasConnectedToMaster; + doGetHiveMetastoreConfig(fakeRpc); + return null; + }) + .addErrback(new RetryTaskErrback<>( + fakeRpc, ignored -> doGetHiveMetastoreConfig(fakeRpc))); + } + + /** + * Errback for retrying a generic TimerTask. Retries RecoverableExceptions; + * signals fakeRpc's Deferred on a fatal error. + */ + class RetryTaskErrback implements Callback { + private final KuduRpc fakeRpc; + private final TimerTask retryTask; + + public RetryTaskErrback(KuduRpc fakeRpc, + TimerTask retryTask) { + this.fakeRpc = fakeRpc; + this.retryTask = retryTask; + } + + @Override + public Void call(Exception arg) { + if (!(arg instanceof RecoverableException)) { + fakeRpc.errback(arg); + return null; + } + + // Sleep and retry the entire operation. + RecoverableException ex = (RecoverableException)arg; + long sleepTime = getSleepTimeForRpcMillis(fakeRpc); + if (cannotRetryRequest(fakeRpc) || + fakeRpc.timeoutTracker.wouldSleepingTimeoutMillis(sleepTime)) { + tooManyAttemptsOrTimeout(fakeRpc, ex); // Invokes fakeRpc.Deferred. + return null; + } + fakeRpc.addTrace( + new RpcTraceFrame.RpcTraceFrameBuilder( + fakeRpc.method(), + RpcTraceFrame.Action.SLEEP_THEN_RETRY) + .callStatus(ex.getStatus()) + .build()); + newTimeout(timer, retryTask, sleepTime); + return null; + + // fakeRpc.Deferred was not invoked: the user continues to wait until + // retryTask succeeds or fails with a fatal error. + } + + @Override + public String toString() { + return "retry task after error"; + } + } + + /** + * Import data allowing this client to authenticate to the cluster. + * This will typically be used before making any connections to servers + * in the cluster. + * + * Note that, if this client has already been used by one user, this + * method cannot be used to switch authenticated users. Attempts to + * do so have undefined results, and may throw an exception. + * + * @param authnData then authentication data provided by a prior call to + * {@link #exportAuthenticationCredentials()} + */ + @InterfaceStability.Unstable + public void importAuthenticationCredentials(byte[] authnData) { + securityContext.importAuthenticationCredentials(authnData); + } + + /** + * Mark the given CA certificates (in DER format) as the trusted ones for the + * client. The provided list of certificates replaces any previously set ones. + * + * @param certificates list of certificates to trust (in DER format) + * @throws CertificateException if any of the specified certificates were invalid + */ + @InterfaceStability.Unstable + public void trustedCertificates(List certificates) throws CertificateException { + securityContext.trustCertificates(certificates); + } + + /** + * Set JWT (JSON Web Token) to authenticate the client to a server. + *

+ * @note If {@link #importAuthenticationCredentials(byte[] authnData)} and + * this method are called on the same object, the JWT provided with this call + * overrides the corresponding JWT that comes as a part of the imported + * authentication credentials (if present). + * + * @param jwt The JSON web token to set. + */ + @InterfaceStability.Unstable + public void jwt(String jwt) { + AuthenticationCredentialsPB credentials = + AuthenticationCredentialsPB.newBuilder() + .setJwt(Token.JwtRawPB.newBuilder() + .setJwtData(ByteString.copyFromUtf8(jwt)) + .build()) + .build(); + securityContext.importAuthenticationCredentials(credentials.toByteArray()); + } + + /** + * Get the timeout used for operations on sessions and scanners. + * @return a timeout in milliseconds + */ + public long getDefaultOperationTimeoutMs() { + return defaultOperationTimeoutMs; + } + + /** + * Get the timeout used for admin operations. + * @return a timeout in milliseconds + */ + public long getDefaultAdminOperationTimeoutMs() { + return defaultAdminOperationTimeoutMs; + } + + /** + * Socket read timeouts are no longer used in the Java client and have no effect. + * This method always returns 0, as that previously indicated no socket read timeout. + * @return a timeout in milliseconds + * @deprecated socket read timeouts are no longer used + */ + @Deprecated public long getDefaultSocketReadTimeoutMs() { + LOG.info("getDefaultSocketReadTimeoutMs is deprecated"); + return 0; + } + + /** + * @return the list of master addresses, stringified using commas to separate + * them + */ + public String getMasterAddressesAsString() { + return Joiner.on(",").join(masterAddresses); + } + + /** + * Check if statistics collection is enabled for this client. + * @return true if it is enabled, else false + */ + public boolean isStatisticsEnabled() { + return !statisticsDisabled; + } + + /** + * Get the statistics object of this client. + * + * @return this client's Statistics object + * @throws IllegalStateException thrown if statistics collection has been disabled + */ + public Statistics getStatistics() { + if (statisticsDisabled) { + throw new IllegalStateException("This client's statistics is disabled"); + } + return this.statistics; + } + + RequestTracker getRequestTracker() { + return requestTracker; + } + + @InterfaceAudience.LimitedPrivate("Test") + KuduTable getMasterTable() { + return masterTable; + } + + /** + * Creates a new {@link AsyncKuduScanner.AsyncKuduScannerBuilder} for a particular table. + * @param table the name of the table you intend to scan. + * The string is assumed to use the platform's default charset. + * @return a new scanner builder for this table + */ + public AsyncKuduScanner.AsyncKuduScannerBuilder newScannerBuilder(KuduTable table) { + checkIsClosed(); + return new AsyncKuduScanner.AsyncKuduScannerBuilder(this, table); + } + + /** + * Create a new session for interacting with the cluster. + * User is responsible for destroying the session object. + * This is a fully local operation (no RPCs or blocking). + * @return a new AsyncKuduSession + */ + public AsyncKuduSession newSession() { + checkIsClosed(); + AsyncKuduSession session = new AsyncKuduSession(this); + synchronized (sessions) { + sessions.add(session); + } + return session; + } + + /** + * This method is for KuduSessions so that they can remove themselves as part of closing down. + * @param session Session to remove + */ + void removeSession(AsyncKuduSession session) { + synchronized (sessions) { + boolean removed = sessions.remove(session); + assert removed; + } + } + + /** + * Package-private access point for {@link AsyncKuduScanner}s to scan more rows. + * @param scanner The scanner to use. + * @return A deferred row. + */ + Deferred scanNextRows(final AsyncKuduScanner scanner) { + RemoteTablet tablet = Preconditions.checkNotNull(scanner.currentTablet()); + KuduRpc nextRequest = scanner.getNextRowsRequest(); + // Important to increment the attempts before the next if statement since + // getSleepTimeForRpc() relies on it if the client is null or dead. + nextRequest.attempt++; + final ServerInfo info = tablet.getTabletServerByUuid(scanner.getTsUUID()); + if (info == null) { + return delayedSendRpcToTablet(nextRequest, new RecoverableException(Status.RemoteError( + String.format("No information on servers hosting tablet %s, will retry later", + tablet.getTabletId())))); + } + + Deferred d = nextRequest.getDeferred(); + RpcProxy.sendRpc(this, connectionCache.getConnection( + info, Connection.CredentialsPolicy.ANY_CREDENTIALS), nextRequest); + return d; + } + + /** + * Package-private access point for {@link AsyncKuduScanner}s to close themselves. + * @param scanner the scanner to close. + * @return a deferred object that indicates the completion of the request. + * The {@link AsyncKuduScanner.Response} can contain rows that were left to scan. + */ + Deferred closeScanner(final AsyncKuduScanner scanner) { + final RemoteTablet tablet = scanner.currentTablet(); + // Getting a null tablet here without being in a closed state means we were in between tablets. + if (tablet == null) { + return Deferred.fromResult(null); + } + final KuduRpc closeRequest = scanner.getCloseRequest(); + final ServerInfo info = tablet.getTabletServerByUuid(scanner.getTsUUID()); + if (info == null) { + return Deferred.fromResult(null); + } + + final Deferred d = closeRequest.getDeferred(); + closeRequest.attempt++; + RpcProxy.sendRpc(this, connectionCache.getConnection( + info, Connection.CredentialsPolicy.ANY_CREDENTIALS), closeRequest); + return d; + } + + /** + * Package-private access point for {@link AsyncKuduScanner}s to keep themselves + * alive on tablet servers. + * @param scanner the scanner to keep alive. + * @return a deferred object that indicates the completion of the request. + */ + Deferred keepAlive(final AsyncKuduScanner scanner) { + checkIsClosed(); + final RemoteTablet tablet = scanner.currentTablet(); + // Getting a null tablet here without being in a closed state means we were in between tablets. + // If there is no scanner to keep alive, we still return Status.OK(). + if (tablet == null) { + return Deferred.fromResult(null); + } + + final KuduRpc keepAliveRequest = scanner.getKeepAliveRequest(); + final ServerInfo info = tablet.getTabletServerByUuid(scanner.getTsUUID()); + if (info == null) { + return Deferred.fromResult(null); + } + + final Deferred d = keepAliveRequest.getDeferred(); + keepAliveRequest.attempt++; + RpcProxy.sendRpc(this, connectionCache.getConnection( + info, Connection.CredentialsPolicy.ANY_CREDENTIALS), keepAliveRequest); + return d; + } + + /** + * Sends the provided {@link KuduRpc} to the tablet server hosting the leader + * of the tablet identified by the RPC's table and partition key. + * + * Note: despite the name, this method is also used for routing master + * requests to the leader master instance since it's also handled like a tablet. + * + * @param request the RPC to send + * @param the expected return type of the RPC + * @return a {@code Deferred} which will contain the response + */ + Deferred sendRpcToTablet(final KuduRpc request) { + if (cannotRetryRequest(request)) { + return tooManyAttemptsOrTimeout(request, null); + } + request.attempt++; + final String tableId = request.getTable().getTableId(); + byte[] partitionKey = request.partitionKey(); + TableLocationsCache.Entry entry = getTableLocationEntry(tableId, partitionKey); + + if (entry != null && entry.isNonCoveredRange()) { + Exception e = new NonCoveredRangeException(entry.getLowerBoundPartitionKey(), + entry.getUpperBoundPartitionKey()); + // Sending both as an errback and returning fromError because sendRpcToTablet might be + // called via a callback that won't care about the returned Deferred. + Deferred d = request.getDeferred(); + request.errback(e); + return d; + } + + // Set the propagated timestamp so that the next time we send a message to + // the server the message includes the last propagated timestamp. + long lastPropagatedTs = getLastPropagatedTimestamp(); + if (request.getExternalConsistencyMode() == CLIENT_PROPAGATED && + lastPropagatedTs != NO_TIMESTAMP) { + request.setPropagatedTimestamp(lastPropagatedTs); + } + + // If we found a tablet, we'll try to find the TS to talk to. + if (entry != null) { + RemoteTablet tablet = entry.getTablet(); + ServerInfo info = tablet.getReplicaSelectedServerInfo(request.getReplicaSelection(), + getLocationString()); + if (info != null) { + Deferred d = request.getDeferred(); + request.setTablet(tablet); + RpcProxy.sendRpc(this, connectionCache.getConnection( + info, Connection.CredentialsPolicy.ANY_CREDENTIALS), request); + return d; + } + } + + request.addTrace( + new RpcTraceFrame.RpcTraceFrameBuilder( + request.method(), + RpcTraceFrame.Action.QUERY_MASTER) + .build()); + + // We fall through to here in two cases: + // + // 1) This client has not yet discovered the tablet which is responsible for + // the RPC's table and partition key. This can happen when the client's + // tablet location cache is cold because the client is new, or the table + // is new. + // + // 2) The tablet is known, but we do not have an active client for the + // leader replica. + Callback, Master.GetTableLocationsResponsePB> cb = new RetryRpcCB<>(request); + Callback, Exception> eb = new RetryRpcErrback<>(request); + Deferred returnedD = + locateTablet(request.getTable(), partitionKey, FETCH_TABLETS_PER_POINT_LOOKUP, request); + return AsyncUtil.addCallbacksDeferring(returnedD, cb, eb); + } + + /** + * Callback used to retry a RPC after another query finished, like looking up where that RPC + * should go. + *

+ * Use {@code AsyncUtil.addCallbacksDeferring} to add this as the callback and + * {@link AsyncKuduClient.RetryRpcErrback} as the "errback" to the {@code Deferred} + * returned by {@link #locateTablet(KuduTable, byte[], int, KuduRpc)}. + * @param RPC's return type. + * @param Previous query's return type, which we don't use, but need to specify in order to + * tie it all together. + */ + final class RetryRpcCB implements Callback, D> { + private final KuduRpc request; + + RetryRpcCB(KuduRpc request) { + this.request = request; + } + + @Override + public Deferred call(final D arg) { + LOG.debug("Retrying sending RPC {} after lookup", request); + return sendRpcToTablet(request); // Retry the RPC. + } + + @Override + public String toString() { + return "retry RPC"; + } + } + + /** + * "Errback" used to delayed-retry a RPC if a recoverable exception is thrown in the callback + * chain. + * Other exceptions are used to notify request RPC error, and passed through to be handled + * by the caller. + *

+ * Use {@code AsyncUtil.addCallbacksDeferring} to add this as the "errback" and + * {@link RetryRpcCB} as the callback to the {@code Deferred} returned by + * {@link #locateTablet(KuduTable, byte[], int, KuduRpc)}. + * @see #delayedSendRpcToTablet(KuduRpc, KuduException) + * @param The type of the original RPC. + */ + final class RetryRpcErrback implements Callback, Exception> { + private final KuduRpc request; + + public RetryRpcErrback(KuduRpc request) { + this.request = request; + } + + @Override + public Deferred call(Exception arg) { + if (arg instanceof RecoverableException) { + return delayedSendRpcToTablet(request, (KuduException) arg); + } + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Notify RPC %s after lookup exception", request), arg); + } + request.errback(arg); + return Deferred.fromError(arg); + } + + @Override + public String toString() { + return "retry RPC after error"; + } + } + + /** + * Returns an errback ensuring that if the delayed call throws an Exception, + * it will be propagated back to the user. + *

+ * @param rpc RPC to errback if there's a problem with the delayed call + * @param RPC's return type + * @return newly created errback + */ + private Callback getDelayedIsTableDoneEB(final KuduRpc rpc) { + return e -> { + // TODO maybe we can retry it? + rpc.errback(e); + return e; + }; + } + + /** + * Creates an RPC that will never be sent, and will instead be used + * exclusively for timeouts. + * @param method fake RPC method (shows up in RPC traces) + * @param parent parent RPC (for tracing), if any + * @param the expected return type of the fake RPC + * @param timeoutMs the timeout in milliseconds for the fake RPC + * @return created fake RPC + */ + KuduRpc buildFakeRpc( + @Nonnull final String method, + @Nullable final KuduRpc parent, + long timeoutMs) { + KuduRpc rpc = new FakeKuduRpc<>(method, timer, timeoutMs); + rpc.setParentRpc(parent); + return rpc; + } + + /** + * Creates an RPC that will never be sent, and will instead be used + * exclusively for timeouts. + * @param method fake RPC method (shows up in RPC traces) + * @param parent parent RPC (for tracing), if any + * @param the expected return type of the fake RPC + * @return created fake RPC + */ + KuduRpc buildFakeRpc( + @Nonnull final String method, + @Nullable final KuduRpc parent) { + return buildFakeRpc(method, parent, defaultAdminOperationTimeoutMs); + } + + /** + * A fake RPC that is used for timeouts and will never be sent. + */ + static class FakeKuduRpc extends KuduRpc { + private final String method; + + FakeKuduRpc(String method, Timer timer, long timeoutMillis) { + super(null, timer, timeoutMillis); + this.method = method; + } + + @Override + Message createRequestPB() { + return null; + } + + @Override + String serviceName() { + return null; + } + + @Override + String method() { + return method; + } + + @Override + Pair deserialize(CallResponse callResponse, String tsUUID) throws KuduException { + return null; + } + } + + /** + * Schedules a IsAlterTableDone RPC. When the response comes in, if the table + * is done altering, the RPC's callback chain is triggered with 'resp' as its + * value. If not, another IsAlterTableDone RPC is scheduled and the cycle + * repeats, until the alter is finished or a timeout is reached. + * @param table table identifier + * @param parent parent RPC (for tracing), if any + * @param resp previous AlterTableResponse, if any + * @return Deferred that will become ready when the alter is done + */ + Deferred getDelayedIsAlterTableDoneDeferred( + @Nonnull TableIdentifierPB.Builder table, + @Nullable KuduRpc parent, + @Nullable AlterTableResponse resp) { + // TODO(adar): By scheduling even the first RPC via timer, the sequence of + // RPCs is delayed by at least one timer tick, which is unfortunate for the + // case where the table is already fully altered. + // + // Eliminating the delay by sending the first RPC immediately (and + // scheduling the rest via timer) would also allow us to replace this "fake" + // RPC with a real one. + KuduRpc fakeRpc = buildFakeRpc("IsAlterTableDone", parent); + + // Store the Deferred locally; callback() or errback() on the RPC will + // reset it and we'd return a different, non-triggered Deferred. + Deferred fakeRpcD = fakeRpc.getDeferred(); + + delayedIsAlterTableDone( + table, + fakeRpc, + getDelayedIsAlterTableDoneCB(fakeRpc, table, resp), + getDelayedIsTableDoneEB(fakeRpc)); + return fakeRpcD; + } + + /** + * Schedules a IsCreateTableDone RPC. When the response comes in, if the table + * is done creating, the RPC's callback chain is triggered with 'resp' as its + * value. If not, another IsCreateTableDone RPC is scheduled and the cycle + * repeats, until the createis finished or a timeout is reached. + * @param table table identifier + * @param parent parent RPC (for tracing), if any + * @param resp previous KuduTable, if any + * @return Deferred that will become ready when the create is done + */ + Deferred getDelayedIsCreateTableDoneDeferred( + @Nonnull TableIdentifierPB.Builder table, + @Nullable KuduRpc parent, + @Nullable KuduTable resp) { + // TODO(adar): By scheduling even the first RPC via timer, the sequence of + // RPCs is delayed by at least one timer tick, which is unfortunate for the + // case where the table is already fully altered. + // + // Eliminating the delay by sending the first RPC immediately (and + // scheduling the rest via timer) would also allow us to replace this "fake" + // RPC with a real one. + KuduRpc fakeRpc = buildFakeRpc("IsCreateTableDone", parent); + + // Store the Deferred locally; callback() or errback() on the RPC will + // reset it and we'd return a different, non-triggered Deferred. + Deferred fakeRpcD = fakeRpc.getDeferred(); + + delayedIsCreateTableDone( + table, + fakeRpc, + getDelayedIsCreateTableDoneCB(fakeRpc, table, resp), + getDelayedIsTableDoneEB(fakeRpc)); + return fakeRpcD; + } + + /** + * Returns a callback to be called upon completion of an IsAlterTableDone RPC. + * If the table is fully altered, triggers the provided rpc's callback chain + * with 'alterResp' as its value. Otherwise, sends another IsAlterTableDone + * RPC after sleeping. + *

+ * @param rpc RPC that initiated this sequence of operations + * @param table table identifier + * @param alterResp response from an earlier AlterTable RPC, if any + * @return callback that will eventually return 'alterResp' + */ + private Callback, IsAlterTableDoneResponse> + getDelayedIsAlterTableDoneCB( + @Nonnull final KuduRpc rpc, + @Nonnull final TableIdentifierPB.Builder table, + @Nullable final AlterTableResponse alterResp) { + return resp -> { + // Store the Deferred locally; callback() below will reset it and we'd + // return a different, non-triggered Deferred. + Deferred d = rpc.getDeferred(); + if (resp.isDone()) { + rpc.callback(alterResp); + } else { + rpc.attempt++; + delayedIsAlterTableDone( + table, + rpc, + getDelayedIsAlterTableDoneCB(rpc, table, alterResp), + getDelayedIsTableDoneEB(rpc)); + } + return d; + }; + } + + /** + * Returns a callback to be called upon completion of an IsCreateTableDone RPC. + * If the table is fully created, triggers the provided rpc's callback chain + * with 'tableResp' as its value. Otherwise, sends another IsCreateTableDone + * RPC after sleeping. + *

+ * @param rpc RPC that initiated this sequence of operations + * @param table table identifier + * @param tableResp previously constructed KuduTable, if any + * @return callback that will eventually return 'tableResp' + */ + private Callback, IsCreateTableDoneResponse> getDelayedIsCreateTableDoneCB( + final KuduRpc rpc, + final TableIdentifierPB.Builder table, + final KuduTable tableResp) { + return resp -> { + // Store the Deferred locally; callback() below will reset it and we'd + // return a different, non-triggered Deferred. + Deferred d = rpc.getDeferred(); + if (resp.isDone()) { + rpc.callback(tableResp); + } else { + rpc.attempt++; + delayedIsCreateTableDone( + table, + rpc, + getDelayedIsCreateTableDoneCB(rpc, table, tableResp), + getDelayedIsTableDoneEB(rpc)); + } + return d; + }; + } + + /** + * Schedules a timer to send an IsCreateTableDone RPC to the master after + * sleeping for getSleepTimeForRpc() (based on the provided KuduRpc's number + * of attempts). When the master responds, the provided callback will be called. + *

+ * @param table table identifier + * @param rpc original KuduRpc that needs to access the table + * @param callback callback to call on completion + * @param errback errback to call if something goes wrong + */ + private void delayedIsCreateTableDone( + final TableIdentifierPB.Builder table, + final KuduRpc rpc, + final Callback, IsCreateTableDoneResponse> callback, + final Callback errback) { + final class RetryTimer implements TimerTask { + @Override + public void run(final Timeout timeout) { + doIsCreateTableDone(table, rpc).addCallbacks(callback, errback); + } + } + + long sleepTimeMillis = getSleepTimeForRpcMillis(rpc); + if (rpc.timeoutTracker.wouldSleepingTimeoutMillis(sleepTimeMillis)) { + tooManyAttemptsOrTimeout(rpc, null); + return; + } + newTimeout(timer, new RetryTimer(), sleepTimeMillis); + } + + /** + * Schedules a timer to send an IsAlterTableDone RPC to the master after + * sleeping for getSleepTimeForRpc() (based on the provided KuduRpc's number + * of attempts). When the master responds, the provided callback will be called. + *

+ * @param table table identifier + * @param rpc original KuduRpc that needs to access the table + * @param callback callback to call on completion + * @param errback errback to call if something goes wrong + */ + private void delayedIsAlterTableDone( + final TableIdentifierPB.Builder table, + final KuduRpc rpc, + final Callback, IsAlterTableDoneResponse> callback, + final Callback errback) { + final class RetryTimer implements TimerTask { + @Override + public void run(final Timeout timeout) { + doIsAlterTableDone(table, rpc).addCallbacks(callback, errback); + } + } + + long sleepTimeMillis = getSleepTimeForRpcMillis(rpc); + if (rpc.timeoutTracker.wouldSleepingTimeoutMillis(sleepTimeMillis)) { + tooManyAttemptsOrTimeout(rpc, null); + return; + } + newTimeout(timer, new RetryTimer(), sleepTimeMillis); + } + + private final class ReleaseMasterLookupPermit implements Callback { + @Override + public T call(final T arg) { + releaseMasterLookupPermit(); + return arg; + } + + @Override + public String toString() { + return "release master lookup permit"; + } + + /** + * Releases a master lookup permit that was acquired. + * See {@link AsyncKuduClient#acquireMasterLookupPermit}. + */ + private void releaseMasterLookupPermit() { + masterLookups.release(); + } + } + + long getSleepTimeForRpcMillis(KuduRpc rpc) { + int attemptCount = rpc.attempt; + if (attemptCount == 0) { + // If this is the first RPC attempt, don't sleep at all. + return 0; + } + // Randomized exponential backoff, truncated at 4096ms. + long sleepTime = (long)(Math.pow(2.0, Math.min(attemptCount, 12)) * + sleepRandomizer.nextDouble()); + if (LOG.isTraceEnabled()) { + LOG.trace("Going to sleep for {} at retry {}", sleepTime, rpc.attempt); + } + return sleepTime; + } + + /** + * Clears {@link #tableLocations} of the table's entries. + * + * This method makes the maps momentarily inconsistent, and should only be + * used when the {@code AsyncKuduClient} is in a steady state. + * @param tableId table for which we remove all cached tablet location and + * tablet client entries + */ + @InterfaceAudience.LimitedPrivate("Test") + void emptyTabletsCacheForTable(String tableId) { + tableLocations.remove(tableId); + } + + /** + * Checks whether or not an RPC can be retried once more + * @param rpc The RPC we're going to attempt to execute + * @return {@code true} if this RPC already had too many attempts, + * {@code false} otherwise (in which case it's OK to retry once more) + */ + private static boolean cannotRetryRequest(final KuduRpc rpc) { + return rpc.timeoutTracker.timedOut() || rpc.attempt > MAX_RPC_ATTEMPTS; + } + + /** + * Returns a {@link Deferred} containing an exception when an RPC couldn't + * succeed after too many attempts or if it already timed out. + * @param request The RPC that was retried too many times or timed out. + * @param cause What was cause of the last failed attempt, if known. + * You can pass {@code null} if the cause is unknown. + */ + static Deferred tooManyAttemptsOrTimeout(final KuduRpc request, + final KuduException cause) { + String message; + if (request.attempt > MAX_RPC_ATTEMPTS) { + message = "too many attempts: "; + } else { + message = "cannot complete before timeout: "; + } + Status statusTimedOut = Status.TimedOut(message + request); + LOG.debug("Cannot continue with RPC because of: {}", statusTimedOut); + Deferred d = request.getDeferred(); + request.errback(new NonRecoverableException(statusTimedOut, cause)); + return d; + } + + /** + * Sends a getTableLocations RPC to the master to find the table's tablets. + * @param table table to lookup + * @param partitionKey can be null, if not we'll find the exact tablet that contains it + * @param fetchBatchSize the number of tablets to fetch per round trip from the master + * @param parentRpc RPC that prompted a master lookup, can be null + * @return Deferred to track the progress + */ + private Deferred locateTablet(KuduTable table, + byte[] partitionKey, + int fetchBatchSize, + KuduRpc parentRpc) { + boolean hasPermit = acquireMasterLookupPermit(); + String tableId = table.getTableId(); + if (!hasPermit) { + // If we failed to acquire a permit, it's worth checking if someone + // looked up the tablet we're interested in. Every once in a while + // this will save us a Master lookup. + TableLocationsCache.Entry entry = getTableLocationEntry(tableId, partitionKey); + if (entry != null && !entry.isNonCoveredRange() && + entry.getTablet().getLeaderServerInfo() != null) { + return Deferred.fromResult(null); // Looks like no lookup needed. + } + } + + // If we know this is going to the master, check the master consensus + // configuration (as specified by 'masterAddresses' field) to determine and + // cache the current leader. + Deferred d; + if (isMasterTable(tableId)) { + d = getMasterTableLocationsPB(parentRpc); + } else { + long timeoutMillis = parentRpc == null ? defaultAdminOperationTimeoutMs : + parentRpc.timeoutTracker.getMillisBeforeTimeout(); + // Leave the end of the partition key range empty in order to pre-fetch tablet locations. + GetTableLocationsRequest rpc = + new GetTableLocationsRequest(masterTable, + partitionKey, + null, + tableId, + fetchBatchSize, + timer, + timeoutMillis); + rpc.setParentRpc(parentRpc); + d = sendRpcToTablet(rpc); + } + d.addCallback(new MasterLookupCB(table, partitionKey, fetchBatchSize)); + if (hasPermit) { + d.addBoth(new ReleaseMasterLookupPermit<>()); + } + return d; + } + + /** + * Update the master config: send RPCs to all config members, use the returned data to + * fill a {@link Master.GetTabletLocationsResponsePB} object. + * @return An initialized Deferred object to hold the response. + */ + Deferred getMasterTableLocationsPB(KuduRpc parentRpc) { + // TODO(todd): stop using this 'masterTable' hack. + return ConnectToCluster.run(masterTable, masterAddresses, parentRpc, + defaultAdminOperationTimeoutMs, Connection.CredentialsPolicy.ANY_CREDENTIALS).addCallback( + resp -> { + if (resp.getConnectResponse().hasAuthnToken()) { + // If the response has security info, adopt it. + securityContext.setAuthenticationToken(resp.getConnectResponse().getAuthnToken()); + } + List caCerts = resp.getConnectResponse().getCaCertDerList(); + if (!caCerts.isEmpty() && (securityContext.getJsonWebToken() == null || + !securityContext.getJsonWebToken().hasJwtData())) { + try { + securityContext.trustCertificates(caCerts); + } catch (CertificateException e) { + LOG.warn("Ignoring invalid CA cert from leader {}: {}", + resp.getLeaderHostAndPort(), e.getMessage()); + } + } + + HiveMetastoreConfig config = null; + Master.ConnectToMasterResponsePB respPb = resp.getConnectResponse(); + if (respPb.hasHmsConfig()) { + Master.HiveMetastoreConfig metastoreConf = respPb.getHmsConfig(); + config = new HiveMetastoreConfig(metastoreConf.getHmsUris(), + metastoreConf.getHmsSaslEnabled(), + metastoreConf.getHmsUuid()); + } + synchronized (AsyncKuduClient.this) { + hiveMetastoreConfig = config; + location = respPb.getClientLocation(); + clusterId = respPb.getClusterId(); + } + + hasConnectedToMaster = true; + + // Translate the located master into a TableLocations + // since the rest of our locations caching code expects this type. + return resp.getAsTableLocations(); + }); + } + + /** + * Get all or some tablets for a given table. This may query the master multiple times if there + * are a lot of tablets. + * This method blocks until it gets all the tablets. + * @param table the table to locate tablets from + * @param startPartitionKey where to start in the table, pass null to start at the beginning + * @param endPartitionKey where to stop in the table, pass null to get all the tablets until the + * end of the table + * @param fetchBatchSize the number of tablets to fetch per round trip from the master + * @param deadline deadline in milliseconds for this method to finish + * @return a list of the tablets in the table, which can be queried for metadata about + * each tablet + * @throws Exception if anything went wrong + */ + List syncLocateTable(KuduTable table, + byte[] startPartitionKey, + byte[] endPartitionKey, + int fetchBatchSize, + long deadline) throws Exception { + return locateTable(table, startPartitionKey, endPartitionKey, fetchBatchSize, deadline).join(); + } + + private Deferred> loopLocateTable(final KuduTable table, + final byte[] startPartitionKey, + final byte[] endPartitionKey, + final int fetchBatchSize, + final List ret, + final TimeoutTracker timeoutTracker) { + // We rely on the keys initially not being empty. + Preconditions.checkArgument(startPartitionKey == null || startPartitionKey.length > 0, + "use null for unbounded start partition key"); + Preconditions.checkArgument(endPartitionKey == null || endPartitionKey.length > 0, + "use null for unbounded end partition key"); + + // The next partition key to look up. If null, then it represents + // the minimum partition key, If empty, it represents the maximum key. + byte[] partitionKey = startPartitionKey; + String tableId = table.getTableId(); + + // Continue while the partition key is the minimum, or it is not the maximum + // and it is less than the end partition key. + while (partitionKey == null || + (partitionKey.length > 0 && + (endPartitionKey == null || Bytes.memcmp(partitionKey, endPartitionKey) < 0))) { + byte[] key = partitionKey == null ? EMPTY_ARRAY : partitionKey; + TableLocationsCache.Entry entry = getTableLocationEntry(tableId, key); + + if (entry != null) { + if (!entry.isNonCoveredRange()) { + ret.add(new LocatedTablet(entry.getTablet())); + } + partitionKey = entry.getUpperBoundPartitionKey(); + continue; + } + + if (timeoutTracker.timedOut()) { + Status statusTimedOut = Status.TimedOut("Took too long getting the list of tablets, " + + timeoutTracker); + return Deferred.fromError(new NonRecoverableException(statusTimedOut)); + } + + // If the partition key location isn't cached, and the request hasn't timed out, + // then kick off a new tablet location lookup and try again when it completes. + // When lookup completes, the tablet (or non-covered range) for the next + // partition key will be located and added to the client's cache. + final byte[] lookupKey = partitionKey; + + // Build a fake RPC to encapsulate and propagate the timeout. There's no actual "RPC" to send. + KuduRpc fakeRpc = buildFakeRpc("loopLocateTable", + null, + timeoutTracker.getMillisBeforeTimeout()); + + return locateTablet(table, key, fetchBatchSize, fakeRpc).addCallbackDeferring( + new Callback>, GetTableLocationsResponsePB>() { + @Override + public Deferred> call(GetTableLocationsResponsePB resp) { + return loopLocateTable(table, + lookupKey, + endPartitionKey, + fetchBatchSize, + ret, + timeoutTracker); + } + + @Override + public String toString() { + return "LoopLocateTableCB"; + } + }); + } + + return Deferred.fromResult(ret); + } + + /** + * Get all or some tablets for a given table. This may query the master multiple times if there + * are a lot of tablets. + * @param table the table to locate tablets from + * @param startPartitionKey where to start in the table, pass null to start at the beginning + * @param endPartitionKey where to stop in the table, pass null to get all the tablets until the + * end of the table + * @param fetchBatchSize the number of tablets to fetch per round trip from the master + * @param deadline max time spent in milliseconds for the deferred result of this method to + * get called back, if deadline is reached, the deferred result will get erred back + * @return a deferred object that yields a list of the tablets in the table, which can be queried + * for metadata about each tablet + */ + Deferred> locateTable(final KuduTable table, + final byte[] startPartitionKey, + final byte[] endPartitionKey, + int fetchBatchSize, + long deadline) { + final List ret = Lists.newArrayList(); + final TimeoutTracker timeoutTracker = new TimeoutTracker(); + timeoutTracker.setTimeout(deadline); + return loopLocateTable(table, + startPartitionKey, + endPartitionKey, + fetchBatchSize, + ret, + timeoutTracker); + } + + /** + * Sends a splitKeyRange RPC to split the tablet's primary key range into smaller ranges. + * This RPC doesn't change the layout of the tablet. + * @param table table to lookup + * @param startPrimaryKey the primary key to begin splitting at (inclusive), pass null to + * start splitting at the beginning of the tablet + * @param endPrimaryKey the primary key to stop splitting at (exclusive), pass null to + * stop splitting at the end of the tablet + * @param partitionKey the partition key of the tablet to find + * @param splitSizeBytes the size of the data in each key range. + * This is a hint: The tablet server may return a key range + * larger or smaller than this value. + * @param parentRpc RPC that prompted the split key range request, can be null + * @return Deferred to track the progress + */ + private Deferred getTabletKeyRanges(final KuduTable table, + final byte[] startPrimaryKey, + final byte[] endPrimaryKey, + final byte[] partitionKey, + long splitSizeBytes, + KuduRpc parentRpc) { + long timeoutMillis = parentRpc == null ? defaultAdminOperationTimeoutMs : + parentRpc.timeoutTracker.getMillisBeforeTimeout(); + + SplitKeyRangeRequest rpc = + new SplitKeyRangeRequest(table, + startPrimaryKey, + endPrimaryKey, + partitionKey, + splitSizeBytes, + timer, + timeoutMillis); + rpc.setParentRpc(parentRpc); + return sendRpcToTablet(rpc); + } + + /** + * Get all or some key range for a given table. This may query the master multiple times if there + * are a lot of tablets, and query each tablet to split the tablet's primary key range into + * smaller ranges. This doesn't change the layout of the tablet. + * @param table the table to get key ranges from + * @param startPrimaryKey the primary key to begin splitting at (inclusive), pass null to + * start splitting at the beginning of the tablet + * @param endPrimaryKey the primary key to stop splitting at (exclusive), pass null to + * stop splitting at the end of the tablet + * @param startPartitionKey where to start in the table, pass null to start at the beginning + * @param endPartitionKey where to stop in the table, pass null to get all the tablets until the + * end of the table + * @param fetchBatchSize the number of tablets to fetch per round trip from the master + * @param splitSizeBytes the size of the data in each key range. + * This is a hint: The tablet server may return the size of key range + * larger or smaller than this value. If unset or <= 0, the key range + * includes all the data of the tablet. + * @param deadline deadline in milliseconds for this method to finish + * @return a {@code Deferred} object that yields a list of the key ranges in the table + */ + Deferred> getTableKeyRanges(final KuduTable table, + final byte[] startPrimaryKey, + final byte[] endPrimaryKey, + final byte[] startPartitionKey, + final byte[] endPartitionKey, + int fetchBatchSize, + long splitSizeBytes, + long deadline) { + final TimeoutTracker timeoutTracker = new TimeoutTracker(); + timeoutTracker.setTimeout(deadline); + + Callback>, List> locateTabletCB = tablets -> { + if (splitSizeBytes <= 0) { + final List keyRanges = Lists.newArrayList(); + for (LocatedTablet tablet : tablets) { + keyRanges.add(new KeyRange(tablet, startPrimaryKey, endPrimaryKey, -1)); + } + return Deferred.fromResult(keyRanges); + } + List>> deferreds = new java.util.ArrayList<>(); + for (LocatedTablet tablet : tablets) { + // Build a fake RPC to encapsulate and propagate the timeout. + // There's no actual "RPC" to send. + KuduRpc fakeRpc = buildFakeRpc("getTableKeyRanges", + null, + timeoutTracker.getMillisBeforeTimeout()); + deferreds.add(getTabletKeyRanges(table, + startPrimaryKey, + endPrimaryKey, + tablet.getPartition().getPartitionKeyStart(), + splitSizeBytes, + fakeRpc) + .addCallbackDeferring(resp -> { + final List ranges = Lists.newArrayList(); + LOG.debug("Key ranges for {}", table.getName()); + for (Common.KeyRangePB pb : resp.getKeyRanges()) { + KeyRange newRange = new KeyRange(tablet, + pb.getStartPrimaryKey().toByteArray(), + pb.getStopPrimaryKey().toByteArray(), + pb.getSizeBytesEstimates()); + ranges.add(newRange); + LOG.debug(newRange.toString()); + } + return Deferred.fromResult(ranges); + })); + } + // Must preserve the order. + return Deferred.groupInOrder(deferreds).addCallbackDeferring(rangeLists -> { + final List ret = Lists.newArrayList(); + for (List ranges : rangeLists) { + ret.addAll(ranges); + } + return Deferred.fromResult(ret); + }); + }; + + final List tablets = Lists.newArrayList(); + return loopLocateTable(table, + startPartitionKey, + endPartitionKey, + fetchBatchSize, + tablets, + timeoutTracker) + .addCallbackDeferring(locateTabletCB); + } + + /** + * We're handling a tablet server that's telling us it doesn't have the tablet we're asking for. + * We're in the context of decode() meaning we need to either callback or retry later. + */ + void handleTabletNotFound(final KuduRpc rpc, KuduException ex, ServerInfo info) { + invalidateTabletCache(rpc.getTablet(), info, ex.getMessage()); + handleRetryableError(rpc, ex); + } + + /** + * A tablet server is letting us know that it isn't the specified tablet's leader in response + * a RPC, so we need to demote it and retry. + */ + void handleNotLeader(final KuduRpc rpc, KuduException ex, ServerInfo info) { + rpc.getTablet().demoteLeader(info.getUuid()); + handleRetryableError(rpc, ex); + } + + void handleRetryableError(final KuduRpc rpc, KuduException ex) { + // TODO we don't always need to sleep, maybe another replica can serve this RPC. + // We don't care about the returned Deferred in this case, since we're not in a context where + // we're eventually returning a Deferred. + delayedSendRpcToTablet(rpc, ex); + } + + /** + * Same as {@link #handleRetryableError(KuduRpc, KuduException)}, but without the delay before + * retrying the RPC. + * + * @param rpc the RPC to retry + * @param ex the exception which lead to the attempt of RPC retry + */ + void handleRetryableErrorNoDelay(final KuduRpc rpc, KuduException ex) { + if (cannotRetryRequest(rpc)) { + tooManyAttemptsOrTimeout(rpc, ex); + return; + } + sendRpcToTablet(rpc); + } + + /** + * Handle an RPC failed due to invalid authn token error. In short, connect to the Kudu cluster + * to acquire a new authentication token and retry the RPC once a new authentication token + * is put into the {@link #securityContext}. + * + * @param rpc the RPC which failed with an invalid authn token + */ + void handleInvalidAuthnToken(KuduRpc rpc) { + // TODO(awong): plumb the offending KuduException into the reacquirer. + tokenReacquirer.handleAuthnTokenExpiration(rpc); + } + + /** + * Handle an RPC that failed due to an invalid authorization token error. The + * RPC will be retried after fetching a new authz token. + * + * @param rpc the RPC that failed with an invalid authz token + * @param ex the KuduException that led to this handling + */ + void handleInvalidAuthzToken(KuduRpc rpc, KuduException ex) { + authzTokenCache.retrieveAuthzToken(rpc, ex); + } + + /** + * Gets an authorization token for the given table from the cache, or nullptr + * if none exists. + * + * @param tableId the table ID for which to get an authz token + * @return a signed authz token for the table + */ + SignedTokenPB getAuthzToken(String tableId) { + return authzTokenCache.get(tableId); + } + + /** + * This method puts RPC on hold for a time interval determined by + * {@link #getSleepTimeForRpcMillis(KuduRpc)}. If the RPC is out of + * time/retries, its errback is called immediately. + * + * @param rpc the RPC to retry later + * @param ex the reason why we need to retry + * @return a Deferred object to use if this method is called inline with the user's original + * attempt to send the RPC. Can be ignored in any other context that doesn't need to return a + * Deferred back to the user. + */ + private Deferred delayedSendRpcToTablet(final KuduRpc rpc, KuduException ex) { + assert (ex != null); + Status reasonForRetry = ex.getStatus(); + rpc.addTrace( + new RpcTraceFrame.RpcTraceFrameBuilder( + rpc.method(), + RpcTraceFrame.Action.SLEEP_THEN_RETRY) + .callStatus(reasonForRetry) + .build()); + + long sleepTime = getSleepTimeForRpcMillis(rpc); + if (cannotRetryRequest(rpc) || + rpc.timeoutTracker.wouldSleepingTimeoutMillis(sleepTime)) { + // Don't let it retry. + return tooManyAttemptsOrTimeout(rpc, ex); + } + // Here we simply retry the RPC later. We might be doing this along with a lot of other RPCs + // in parallel. Asynchbase does some hacking with a "probe" RPC while putting the other ones + // on hold but we won't be doing this for the moment. Regions in HBase can move a lot, + // we're not expecting this in Kudu. + newTimeout(timer, timeout -> sendRpcToTablet(rpc), sleepTime); + return rpc.getDeferred(); + } + + /** + * Remove the tablet server from the RemoteTablet's locations. Right now nothing is removing + * the tablet itself from the caches. + */ + private void invalidateTabletCache(RemoteTablet tablet, + ServerInfo info, + String errorMessage) { + final String uuid = info.getUuid(); + LOG.info("Invalidating location {} for tablet {}: {}", + info, tablet.getTabletId(), errorMessage); + // TODO(ghenke): Should this also remove the related replica? + // As it stands there can be a replica with a missing tablet server. + tablet.removeTabletClient(uuid); + } + + /** + * Translate master-provided information {@link Master.TSInfoPB} on a tablet server into internal + * {@link ServerInfo} representation. + * + * @param tsInfoPB master-provided information for the tablet server + * @return an object that contains all the server's information + * @throws UnknownHostException if we cannot resolve the tablet server's IP address + */ + private ServerInfo resolveTS(Master.TSInfoPB tsInfoPB) throws UnknownHostException { + final List addresses = tsInfoPB.getRpcAddressesList(); + final String uuid = tsInfoPB.getPermanentUuid().toStringUtf8(); + if (addresses.isEmpty()) { + LOG.warn("Received a tablet server with no addresses, UUID: {}", uuid); + return null; + } + + // From meta_cache.cc: + // TODO: If the TS advertises multiple host/ports, pick the right one + // based on some kind of policy. For now just use the first always. + final HostAndPort hostPort = ProtobufHelper.hostAndPortFromPB(addresses.get(0)); + final InetAddress inetAddress = NetUtil.getInetAddress(hostPort.getHost()); + if (inetAddress == null) { + throw new UnknownHostException( + "Failed to resolve the IP of `" + addresses.get(0).getHost() + "'"); + } + return new ServerInfo(uuid, hostPort, inetAddress, tsInfoPB.getLocation()); + } + + /** Callback executed when a master lookup completes. */ + private final class MasterLookupCB implements Callback { + final KuduTable table; + private final byte[] partitionKey; + private final int requestedBatchSize; + + MasterLookupCB(KuduTable table, byte[] partitionKey, int requestedBatchSize) { + this.table = table; + this.partitionKey = partitionKey; + this.requestedBatchSize = requestedBatchSize; + } + + @Override + public Object call(final GetTableLocationsResponsePB response) { + if (response.hasError()) { + Status status = Status.fromMasterErrorPB(response.getError()); + return new NonRecoverableException(status); + } else { + try { + discoverTablets(table, + partitionKey, + requestedBatchSize, + response.getTabletLocationsList(), + response.getTsInfosList(), + response.getTtlMillis()); + } catch (KuduException e) { + return e; + } + } + return null; + } + + @Override + public String toString() { + return "get tablet locations from the master for table " + table.getName(); + } + } + + private boolean acquireMasterLookupPermit() { + try { + // With such a low timeout, the JVM may chose to spin-wait instead of + // de-scheduling the thread (and causing context switches and whatnot). + return masterLookups.tryAcquire(5, MILLISECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); // Make this someone else's problem. + return false; + } + } + + /** + * Makes discovered tablet locations visible in the client's caches. + * @param table the table which the locations belong to + * @param requestPartitionKey the partition key of the table locations request + * @param requestedBatchSize the number of tablet locations requested from the master in the + * original request + * @param locations the discovered locations + * @param tsInfosList a list of ts info that the replicas in 'locations' references by index. + * @param ttl the ttl of the locations + */ + @InterfaceAudience.LimitedPrivate("Test") + void discoverTablets(KuduTable table, + byte[] requestPartitionKey, + int requestedBatchSize, + List locations, + List tsInfosList, + long ttl) throws KuduException { + String tableId = table.getTableId(); + String tableName = table.getName(); + + TableLocationsCache locationsCache = getOrCreateTableLocationsCache(tableId); + + // Build the list of discovered remote tablet instances. If we have + // already discovered the tablet, its locations are refreshed. + int numTsInfos = tsInfosList.size(); + List tablets = new ArrayList<>(locations.size()); + for (TabletLocationsPB tabletPb : locations) { + + List lookupExceptions = new ArrayList<>(tabletPb.getInternedReplicasCount()); + List servers = new ArrayList<>(tabletPb.getInternedReplicasCount()); + List dnsFailedServers = new ArrayList<>(tabletPb.getInternedReplicasCount()); + // Lambda that does the common handling of a ts info. + Consumer updateServersAndCollectExceptions = tsInfo -> { + try { + ServerInfo serverInfo = resolveTS(tsInfo); + if (serverInfo != null) { + servers.add(serverInfo); + } + } catch (UnknownHostException ex) { + lookupExceptions.add(ex); + final List addresses = tsInfo.getRpcAddressesList(); + // Here only add the first address because resolveTS only resolves the first one. + dnsFailedServers.add(addresses.get(0).getHost()); + } + }; + + List replicas = new ArrayList<>(); + // Handle "old-style" non-interned replicas. It's used for backward compatibility. + for (TabletLocationsPB.DEPRECATED_ReplicaPB replica : tabletPb.getDEPRECATEDReplicasList()) { + TSInfoPB tsInfo = replica.getTsInfo(); + updateServersAndCollectExceptions.accept(tsInfo); + String tsHost = tsInfo.getRpcAddressesList().isEmpty() ? + null : tsInfo.getRpcAddressesList().get(0).getHost(); + if (tsHost == null || dnsFailedServers.contains(tsHost)) { + // skip the DNS failed tserver + continue; + } + Integer tsPort = tsInfo.getRpcAddressesList().isEmpty() ? + null : tsInfo.getRpcAddressesList().get(0).getPort(); + String dimensionLabel = replica.hasDimensionLabel() ? replica.getDimensionLabel() : null; + replicas.add(new LocatedTablet.Replica(tsHost, tsPort, replica.getRole(), dimensionLabel)); + } + // Handle interned replicas. + for (TabletLocationsPB.InternedReplicaPB replica : tabletPb.getInternedReplicasList()) { + int tsInfoIdx = replica.getTsInfoIdx(); + if (tsInfoIdx >= numTsInfos) { + lookupExceptions.add(new NonRecoverableException(Status.Corruption( + String.format("invalid response from master: referenced tablet idx %d but only %d " + + "present", tsInfoIdx, numTsInfos)))); + continue; + } + TSInfoPB tsInfo = tsInfosList.get(tsInfoIdx); + updateServersAndCollectExceptions.accept(tsInfo); + String tsHost = tsInfo.getRpcAddressesList().isEmpty() ? + null : tsInfo.getRpcAddressesList().get(0).getHost(); + if (tsHost == null || dnsFailedServers.contains(tsHost)) { + // skip the DNS failed tserver + continue; + } + Integer tsPort = tsInfo.getRpcAddressesList().isEmpty() ? + null : tsInfo.getRpcAddressesList().get(0).getPort(); + String dimensionLabel = replica.hasDimensionLabel() ? replica.getDimensionLabel() : null; + replicas.add(new LocatedTablet.Replica(tsHost, tsPort, replica.getRole(), dimensionLabel)); + } + + if (!lookupExceptions.isEmpty() && + lookupExceptions.size() == tabletPb.getInternedReplicasCount()) { + Status statusIOE = Status.IOError("Couldn't find any valid locations, exceptions: " + + lookupExceptions); + throw new NonRecoverableException(statusIOE); + } + + RemoteTablet rt = new RemoteTablet(tableId, + tabletPb.getTabletId().toStringUtf8(), + ProtobufHelper.pbToPartition(tabletPb.getPartition()), + replicas, + servers); + + LOG.debug("Learned about tablet {} for table '{}' with partition {}", + rt.getTabletId(), tableName, rt.getPartition()); + tablets.add(rt); + } + + // Give the locations to the tablet location cache for the table, so that it + // can cache them and discover non-covered ranges. + locationsCache.cacheTabletLocations(tablets, requestPartitionKey, requestedBatchSize, ttl); + + // Now test if we found the tablet we were looking for. If so, RetryRpcCB will retry the RPC + // right away. If not, we throw an exception that RetryRpcErrback will understand as needing to + // sleep before retrying. + TableLocationsCache.Entry entry = locationsCache.get(requestPartitionKey); + if (entry != null && !entry.isNonCoveredRange() && + entry.getTablet().getLeaderServerInfo() == null) { + throw new NoLeaderFoundException( + Status.NotFound("Tablet " + entry + " doesn't have a leader")); + } + } + + TableLocationsCache getOrCreateTableLocationsCache(String tableId) { + // Doing a get first instead of putIfAbsent to avoid creating unnecessary + // table locations caches because in the most common case the table should + // already be present. + TableLocationsCache locationsCache = tableLocations.get(tableId); + if (locationsCache == null) { + locationsCache = new TableLocationsCache(); + TableLocationsCache existingLocationsCache = + tableLocations.putIfAbsent(tableId, locationsCache); + if (existingLocationsCache != null) { + locationsCache = existingLocationsCache; + } + } + return locationsCache; + } + + /** + * Gets the tablet location cache entry for the tablet in the table covering a partition key. + * @param tableId the table + * @param partitionKey the partition key of the tablet to find + * @return a tablet location cache entry, or null if the partition key has not been discovered + */ + TableLocationsCache.Entry getTableLocationEntry(String tableId, byte[] partitionKey) { + TableLocationsCache cache = tableLocations.get(tableId); + if (cache == null) { + return null; + } + return cache.get(partitionKey); + } + + enum LookupType { + // The lookup should only return a tablet which actually covers the + // requested partition key. + POINT, + // The lookup should return the next tablet after the requested + // partition key if the requested key does not fall within a covered + // range. + LOWER_BOUND + } + + /** + * Returns a deferred containing the located tablet which covers the partition key in the table. + * @param table the table + * @param partitionKey the partition key of the tablet to look up in the table + * @param lookupType the type of lookup to use + * @param timeoutMs timeout in milliseconds for this lookup to finish + * @return a deferred containing the located tablet + */ + Deferred getTabletLocation(final KuduTable table, + final byte[] partitionKey, + final LookupType lookupType, + long timeoutMs) { + + // Locate the tablet at the partition key by locating tablets between + // the partition key (inclusive), and the incremented partition key (exclusive). + // We expect this to return at most a single tablet (checked below). + byte[] startPartitionKey; + byte[] endPartitionKey; + if (partitionKey.length == 0) { + startPartitionKey = null; + endPartitionKey = new byte[]{0x00}; + } else { + startPartitionKey = partitionKey; + endPartitionKey = Arrays.copyOf(partitionKey, partitionKey.length + 1); + } + + final TimeoutTracker timeoutTracker = new TimeoutTracker(); + timeoutTracker.setTimeout(timeoutMs); + Deferred> locatedTablets = locateTable( + table, startPartitionKey, endPartitionKey, FETCH_TABLETS_PER_POINT_LOOKUP, timeoutMs); + + // Then pick out the single tablet result from the list. + return locatedTablets.addCallbackDeferring(tablets -> { + Preconditions.checkArgument(tablets.size() <= 1, + "found more than one tablet for a single partition key"); + if (tablets.isEmpty()) { + // Most likely this indicates a non-covered range, but since this + // could race with an alter table partitioning operation (which + // clears the local table locations cache), we check again. + TableLocationsCache.Entry entry = getTableLocationEntry(table.getTableId(), + partitionKey); + + if (entry == null) { + // This should be extremely rare, but a potential source of tight loops. + LOG.debug("Table location expired before it could be processed; retrying."); + return Deferred.fromError(new RecoverableException(Status.NotFound( + "Table location expired before it could be processed"))); + } + if (entry.isNonCoveredRange()) { + if (lookupType == LookupType.POINT || entry.getUpperBoundPartitionKey().length == 0) { + return Deferred.fromError( + new NonCoveredRangeException(entry.getLowerBoundPartitionKey(), + entry.getUpperBoundPartitionKey())); + } + // This is a LOWER_BOUND lookup, get the tablet location from the upper bound key + // of the non-covered range to return the next valid tablet location. + return getTabletLocation(table, + entry.getUpperBoundPartitionKey(), + LookupType.POINT, + timeoutTracker.getMillisBeforeTimeout()); + } + return Deferred.fromResult(new LocatedTablet(entry.getTablet())); + } + return Deferred.fromResult(tablets.get(0)); + }); + } + + /** + * Invokes {@link #shutdown()} and waits. This method returns void, so consider invoking + * {@link #shutdown()} directly if there's a need to handle dangling RPCs. + * + * @throws Exception if an error happens while closing the connections + */ + @Override + public void close() throws Exception { + shutdown().join(); + } + + /** + * Performs a graceful shutdown of this instance. + *

+ *

    + *
  • {@link AsyncKuduSession#flush Flushes} all buffered edits.
  • + *
  • Cancels all the other requests.
  • + *
  • Terminates all connections.
  • + *
  • Releases all other resources.
  • + *
+ * Not calling this method before losing the last reference to this + * instance may result in data loss and other unwanted side effects. + * + * @return A {@link Deferred}, whose callback chain will be invoked once all + * of the above have been done. If this callback chain doesn't fail, then + * the clean shutdown will be successful, and all the data will be safe on + * the Kudu side. In case of a failure (the "errback" is invoked) you will have + * to open a new AsyncKuduClient if you want to retry those operations. + * The Deferred doesn't actually hold any content. + */ + public Deferred> shutdown() { + checkIsClosed(); + closed = true; + + // 3. Release all other resources. + final class ReleaseResourcesCB implements Callback, ArrayList> { + @Override + @SuppressWarnings("FutureReturnValueIgnored") + public ArrayList call(final ArrayList arg) throws InterruptedException { + LOG.debug("Releasing all remaining resources"); + timer.stop(); + // AbstractEventExecutor sets a default `quietPeriod` of 2 seconds and a 15 second timeout. + // We disable to quiet period to prevent resource leaks due to clients running forever. + bootstrap.config().group().shutdownGracefully(0, 15, TimeUnit.SECONDS); + return arg; + } + + @Override + public String toString() { + return "release resources callback"; + } + } + + // 2. Terminate all connections. + final class DisconnectCB implements Callback>, + ArrayList>> { + @Override + public Deferred> call(ArrayList> ignoredResponses) { + return connectionCache.disconnectEverything().addCallback(new ReleaseResourcesCB()); + } + + @Override + public String toString() { + return "disconnect callback"; + } + } + + // 1. Flush everything. + // Notice that we do not handle the errback, if there's an exception it will come straight out. + return closeAllSessions().addCallbackDeferring(new DisconnectCB()); + } + + // Create a new transactional session in the context of the transaction + // with the specified identifier. + AsyncKuduSession newTransactionalSession(long txnId) { + checkIsClosed(); + AsyncKuduSession session = new AsyncKuduSession(this, txnId); + synchronized (sessions) { + sessions.add(session); + } + return session; + } + + private void checkIsClosed() { + if (closed) { + throw new IllegalStateException("Cannot proceed, the client has already been closed"); + } + } + + private Deferred>> closeAllSessions() { + // We create a copy because AsyncKuduSession.close will call removeSession which would get us a + // concurrent modification during the iteration. + Set copyOfSessions; + synchronized (sessions) { + copyOfSessions = new HashSet<>(sessions); + } + if (copyOfSessions.isEmpty()) { + return Deferred.fromResult(null); + } + // Guaranteed that we'll have at least one session to close. + List>> deferreds = new ArrayList<>(copyOfSessions.size()); + for (AsyncKuduSession session : copyOfSessions ) { + deferreds.add(session.close()); + } + + return Deferred.group(deferreds); + } + + @SuppressWarnings("ReferenceEquality") + private static boolean isMasterTable(String tableId) { + // Checking that it's the same instance so there's absolutely no chance of confusing the master + // 'table' for a user one. + return MASTER_TABLE_NAME_PLACEHOLDER == tableId; + } + + /** + * Utility function to register a timeout task 'task' on timer 'timer' that + * will fire after 'timeoutMillis' milliseconds. Returns a handle to the + * scheduled timeout, which can be used to cancel the task and release its + * resources. + * @param timer the timer on which the task is scheduled + * @param task the task that will be run when the timeout hits + * @param timeoutMillis the timeout, in milliseconds + * @return a handle to the scheduled timeout + */ + static Timeout newTimeout(final Timer timer, + final TimerTask task, + final long timeoutMillis) { + Preconditions.checkNotNull(timer); + try { + return timer.newTimeout(task, timeoutMillis, MILLISECONDS); + } catch (IllegalStateException e) { + // This can happen if the timer fires just before shutdown() + // is called from another thread, and due to how threads get + // scheduled we tried to call newTimeout() after timer.stop(). + LOG.warn("Failed to schedule timer. Ignore this if we're shutting down.", e); + } + return null; + } + + /** + * @return copy of the current TabletClients list + */ + @InterfaceAudience.LimitedPrivate("Test") + List getConnectionListCopy() { + return connectionCache.getConnectionListCopy(); + } + + /** + * Sends a request to the master to check if the cluster supports ignore operations, including + * InsertIgnore, UpdateIgnore and DeleteIgnore operations. + * @return true if the cluster supports ignore operations + */ + @InterfaceAudience.Private + public Deferred supportsIgnoreOperations() { + PingRequest ping = PingRequest.makeMasterPingRequest( + this.masterTable, timer, defaultAdminOperationTimeoutMs); + ping.addRequiredFeature(Master.MasterFeatures.IGNORE_OPERATIONS_VALUE); + Deferred response = sendRpcToTablet(ping); + return AsyncUtil.addBoth(response, new PingSupportsFeatureCallback()); + } + + private static final class PingSupportsFeatureCallback implements Callback { + @Override + public Boolean call(final Object resp) { + if (resp instanceof Exception) { + // The server returns an RpcRemoteException when the required feature is not supported. + // The exception should have an ERROR_INVALID_REQUEST error code and at least one + // unsupported feature flag. + if (resp instanceof RpcRemoteException && + ((RpcRemoteException) resp).getErrPB().getCode() == ERROR_INVALID_REQUEST && + ((RpcRemoteException) resp).getErrPB().getUnsupportedFeatureFlagsCount() >= 1) { + return false; + } + throw new IllegalStateException((Exception) resp); + } + return true; + } + + @Override + public String toString() { + return "ping supports ignore operations"; + } + } + + public enum EncryptionPolicy { + // Optional, it uses encrypted connection if the server supports it, + // but it can connect to insecure servers too. + OPTIONAL, + // Only connects to remote servers that support encryption, fails + // otherwise. It can connect to insecure servers only locally. + REQUIRED_REMOTE, + // Only connects to any server, including on the loopback interface, + // that support encryption, fails otherwise. + REQUIRED, + } + + /** + * Builder class to use in order to connect to Kudu. + * All the parameters beyond those in the constructors are optional. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static final class AsyncKuduClientBuilder { + private static final int DEFAULT_MASTER_PORT = 7051; + private static final int DEFAULT_WORKER_COUNT = 2 * Runtime.getRuntime().availableProcessors(); + + private final List masterAddresses; + private long defaultAdminOperationTimeoutMs = DEFAULT_OPERATION_TIMEOUT_MS; + private long defaultOperationTimeoutMs = DEFAULT_OPERATION_TIMEOUT_MS; + private long defaultNegotiationTimeoutMs = DEFAULT_NEGOTIATION_TIMEOUT_MS; + + private final HashedWheelTimer timer = new HashedWheelTimer( + new ThreadFactoryBuilder().setDaemon(true).build(), 20, MILLISECONDS); + private Executor workerExecutor; + private int workerCount = DEFAULT_WORKER_COUNT; + private boolean statisticsDisabled = false; + private String saslProtocolName = "kudu"; + private boolean requireAuthentication = false; + private EncryptionPolicy encryptionPolicy = EncryptionPolicy.OPTIONAL; + + /** + * Creates a new builder for a client that will connect to the specified masters. + * @param masterAddresses comma-separated list of "host:port" pairs of the masters + */ + public AsyncKuduClientBuilder(String masterAddresses) { + this.masterAddresses = NetUtil.parseStrings(masterAddresses, DEFAULT_MASTER_PORT); + } + + /** + * Creates a new builder for a client that will connect to the specified masters. + * + *

Here are some examples of recognized formats: + *

    + *
  • example.com + *
  • example.com:80 + *
  • 192.0.2.1 + *
  • 192.0.2.1:80 + *
  • [2001:db8::1] + *
  • [2001:db8::1]:80 + *
  • 2001:db8::1 + *
+ * + * @param masterAddresses list of master addresses + */ + public AsyncKuduClientBuilder(List masterAddresses) { + this.masterAddresses = Lists.newArrayListWithCapacity(masterAddresses.size()); + for (String address : masterAddresses) { + this.masterAddresses.add( + NetUtil.parseString(address, DEFAULT_MASTER_PORT)); + } + } + + /** + * Sets the default timeout used for administrative operations (e.g. createTable, deleteTable, + * etc). + * Optional. + * If not provided, defaults to 30s. + * A value of 0 disables the timeout. + * @param timeoutMs a timeout in milliseconds + * @return this builder + */ + public AsyncKuduClientBuilder defaultAdminOperationTimeoutMs(long timeoutMs) { + this.defaultAdminOperationTimeoutMs = timeoutMs; + return this; + } + + /** + * Sets the default timeout used for user operations (using sessions and scanners). + * Optional. + * If not provided, defaults to 30s. + * A value of 0 disables the timeout. + * @param timeoutMs a timeout in milliseconds + * @return this builder + */ + public AsyncKuduClientBuilder defaultOperationTimeoutMs(long timeoutMs) { + this.defaultOperationTimeoutMs = timeoutMs; + return this; + } + + /** + * Sets the default timeout used for connection negotiation. + * Optional. + * If not provided, defaults to 10s. + * @param timeoutMs a timeout in milliseconds + * @return this builder + */ + public AsyncKuduClientBuilder connectionNegotiationTimeoutMs(long timeoutMs) { + this.defaultNegotiationTimeoutMs = timeoutMs; + return this; + } + + /** + * Socket read timeouts are no longer used in the Java client and have no effect. + * Setting this has no effect. + * @param timeoutMs a timeout in milliseconds + * @return this builder + * @deprecated this option no longer has any effect + */ + @Deprecated public AsyncKuduClientBuilder defaultSocketReadTimeoutMs(long timeoutMs) { + LOG.info("defaultSocketReadTimeoutMs is deprecated"); + return this; + } + + /** + * @deprecated the bossExecutor is no longer used and will have no effect if provided + */ + @Deprecated + public AsyncKuduClientBuilder nioExecutors(Executor bossExecutor, Executor workerExecutor) { + this.workerExecutor = workerExecutor; + return this; + } + + /** + * Set the executor which will be used for the embedded Netty workers. + * + * Optional. + * If not provided, uses a simple cached threadpool. If workerExecutor is null, + * then such a thread pool will be used. + * Note: executor's max thread number must be greater or equal to corresponding + * worker count, or netty cannot start enough threads, and client will get stuck. + * If not sure, please just use CachedThreadPool. + */ + public AsyncKuduClientBuilder nioExecutor(Executor workerExecutor) { + this.workerExecutor = workerExecutor; + return this; + } + + /** + * @deprecated the bossExecutor is no longer used and will have no effect if provided + */ + @Deprecated + public AsyncKuduClientBuilder bossCount(int bossCount) { + LOG.info("bossCount is deprecated"); + return this; + } + + /** + * Set the maximum number of Netty worker threads. + * A worker thread performs non-blocking read and write for one or more + * Netty Channels in a non-blocking mode. + * + * Optional. + * If not provided, (2 * the number of available processors) is used. If + * this client instance will be used on a machine running many client + * instances, it may be wise to lower this count, for example to avoid + * resource limits, at the possible cost of some performance of this client + * instance. + */ + public AsyncKuduClientBuilder workerCount(int workerCount) { + Preconditions.checkArgument(workerCount > 0, "workerCount should be greater than 0"); + this.workerCount = workerCount; + return this; + } + + /** + * Require authentication for the connection to a remote server. + * + * If it's set to true, the client will require mutual authentication between + * the server and the client. If the server doesn't support authentication, + * or it's disabled, the client will fail to connect. + */ + public AsyncKuduClientBuilder requireAuthentication(boolean requireAuthentication) { + this.requireAuthentication = requireAuthentication; + return this; + } + + /** + * Require encryption for the connection to a remote server. + * + * If it's set to REQUIRED_REMOTE or REQUIRED, the client will + * require encrypting the traffic between the server and the client. + * If the server doesn't support encryption, or if it's disabled, the + * client will fail to connect. + * + * Loopback connections are encrypted only if 'encryption_policy' is + * set to REQUIRED, or if it's required by the server. + * + * The default value is OPTIONAL, which allows connecting to servers without + * encryption as well, but it will still attempt to use it if the server + * supports it. + */ + public AsyncKuduClientBuilder encryptionPolicy(EncryptionPolicy encryptionPolicy) { + this.encryptionPolicy = encryptionPolicy; + return this; + } + + /** + * Creates the client bootstrap for Netty. The user can specify the executor, but + * if they don't, we'll use a simple thread pool. + */ + private Bootstrap createBootstrap() { + Executor worker = workerExecutor; + if (worker == null) { + worker = Executors.newCachedThreadPool( + new ThreadFactoryBuilder() + .setNameFormat("kudu-nio-%d") + .setDaemon(true) + .build()); + } + EventLoopGroup workerGroup = new NioEventLoopGroup(workerCount, worker); + Bootstrap b = new Bootstrap(); + b.group(workerGroup); + b.channel(NioSocketChannel.class); + b.option(ChannelOption.CONNECT_TIMEOUT_MILLIS, 60000); + b.option(ChannelOption.TCP_NODELAY, true); + // Unfortunately there is no way to override the keep-alive timeout in + // Java since the JRE doesn't expose any way to call setsockopt() with + // TCP_KEEPIDLE. And of course the default timeout is >2h. Sigh. + b.option(ChannelOption.SO_KEEPALIVE, true); + b.option(ChannelOption.ALLOCATOR, PooledByteBufAllocator.DEFAULT); + return b; + } + + /** + * Disable this client's collection of statistics. + * Statistics are enabled by default. + * @return this builder + */ + public AsyncKuduClientBuilder disableStatistics() { + this.statisticsDisabled = true; + return this; + } + + /** + * Set the SASL protocol name. + * SASL protocol name is used when connecting to a secure (Kerberos-enabled) + * cluster. It must match the servers' service principal name (SPN). + * + * Optional. + * If not provided, it will use the default SASL protocol name ("kudu"). + * @return this builder + */ + public AsyncKuduClientBuilder saslProtocolName(String saslProtocolName) { + this.saslProtocolName = saslProtocolName; + return this; + } + + /** + * Creates a new client that connects to the masters. + * Doesn't block and won't throw an exception if the masters don't exist. + * @return a new asynchronous Kudu client + */ + public AsyncKuduClient build() { + return new AsyncKuduClient(this); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduScanner.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduScanner.java new file mode 100644 index 0000000000..31a2cdf75f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduScanner.java @@ -0,0 +1,1416 @@ +/* + * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.kudu.client; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.kudu.tserver.Tserver.NewScanRequestPB; +import static org.apache.kudu.tserver.Tserver.ResourceMetricsPB; +import static org.apache.kudu.tserver.Tserver.ScanRequestPB; +import static org.apache.kudu.tserver.Tserver.ScanResponsePB; +import static org.apache.kudu.tserver.Tserver.TabletServerErrorPB; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import com.google.protobuf.Message; +import com.google.protobuf.UnsafeByteOperations; +import com.stumbleupon.async.Callback; +import com.stumbleupon.async.Deferred; +import io.netty.util.HashedWheelTimer; +import io.netty.util.Timeout; +import io.netty.util.TimerTask; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Common; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.security.Token; +import org.apache.kudu.tserver.Tserver; +import org.apache.kudu.tserver.Tserver.ScannerKeepAliveRequestPB; +import org.apache.kudu.tserver.Tserver.ScannerKeepAliveResponsePB; +import org.apache.kudu.util.Pair; + +/** + * Creates a scanner to read data from Kudu. + *

+ * This class is not synchronized as it's expected to be + * used from a single thread at a time. It's rarely (if ever?) useful to + * scan concurrently from a shared scanner using multiple threads. If you + * want to optimize large table scans using extra parallelism, create a few + * scanners through the {@link KuduScanToken} API. Or use MapReduce. + *

+ * There's no method in this class to explicitly open the scanner. It will open + * itself automatically when you start scanning by calling {@link #nextRows()}. + * Also, the scanner will automatically call {@link #close} when it reaches the + * end key. If, however, you would like to stop scanning before reaching the + * end key, you must call {@link #close} before disposing of the scanner. + * Note that it's always safe to call {@link #close} on a scanner. + *

+ * A {@code AsyncKuduScanner} is not re-usable. Should you want to scan the same rows + * or the same table again, you must create a new one. + * + *

A note on passing {@code byte} arrays in argument

+ * None of the method that receive a {@code byte[]} in argument will copy it. + * For more info, please refer to the documentation of {@link KuduRpc}. + *

A note on passing {@code String}s in argument

+ * All strings are assumed to use the platform's default charset. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public final class AsyncKuduScanner { + + private static final Logger LOG = LoggerFactory.getLogger(AsyncKuduScanner.class); + + /** + * The possible read modes for scanners. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public enum ReadMode { + /** + * When READ_LATEST is specified the server will always return committed writes at + * the time the request was received. This type of read does not return a snapshot + * timestamp and is not repeatable. + * + * In ACID terms this corresponds to Isolation mode: "Read Committed" + * + * This is the default mode. + */ + READ_LATEST(Common.ReadMode.READ_LATEST), + + /** + * When READ_AT_SNAPSHOT is specified the server will attempt to perform a read + * at the provided timestamp. If no timestamp is provided the server will take the + * current time as the snapshot timestamp. In this mode reads are repeatable, i.e. + * all future reads at the same timestamp will yield the same data. This is + * performed at the expense of waiting for in-flight transactions whose timestamp + * is lower than the snapshot's timestamp to complete, so it might incur a latency + * penalty. + * + * In ACID terms this, by itself, corresponds to Isolation mode "Repeatable + * Read". If all writes to the scanned tablet are made externally consistent, + * then this corresponds to Isolation mode "Strict-Serializable". + * + * Note: there currently "holes", which happen in rare edge conditions, by which writes + * are sometimes not externally consistent even when action was taken to make them so. + * In these cases Isolation may degenerate to mode "Read Committed". See KUDU-430. + */ + READ_AT_SNAPSHOT(Common.ReadMode.READ_AT_SNAPSHOT), + + /** + * When @c READ_YOUR_WRITES is specified, the client will perform a read + * such that it follows all previously known writes and reads from this client. + * Specifically this mode: + * (1) ensures read-your-writes and read-your-reads session guarantees, + * (2) minimizes latency caused by waiting for outstanding write + * transactions to complete. + * + * Reads in this mode are not repeatable: two READ_YOUR_WRITES reads, even if + * they provide the same propagated timestamp bound, can execute at different + * timestamps and thus may return different results. + */ + READ_YOUR_WRITES(Common.ReadMode.READ_YOUR_WRITES); + + private final Common.ReadMode pbVersion; + ReadMode(Common.ReadMode pbVersion) { + this.pbVersion = pbVersion; + } + + @InterfaceAudience.Private + public Common.ReadMode pbVersion() { + return this.pbVersion; + } + } + + /** + * Expected row data format in scanner result set. + * + * The server may or may not support the expected layout, and the actual layout is internal + * hidden by {@link RowResult} and {@link RowResultIterator} interfaces so it's transparent to + * application code. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public enum RowDataFormat { + /** + * Server is expected to return scanner result data in row-wise layout. + * This is currently the default layout. + */ + ROWWISE, + + /** + * Server is expected to return scanner result data in columnar layout. + * This layout is more efficient in processing and bandwidth for both server and client side. + * It requires server support (kudu-1.12.0 and later), if it's not supported server still + * returns data in row-wise layout. + */ + COLUMNAR, + } + + // This is private because it is not safe to use this column name as it may be + // different in the case of collisions. Instead the `IS_DELETED` column should + // be looked up by type. + static final String DEFAULT_IS_DELETED_COL_NAME = "is_deleted"; + + ////////////////////////// + // Initial configurations. + ////////////////////////// + + private final AsyncKuduClient client; + private final KuduTable table; + private final Schema schema; + + private final PartitionPruner pruner; + + /** + * Map of column name to predicate. + */ + private final Map predicates; + + /** + * Maximum number of bytes returned by the scanner, on each batch. + */ + private final int batchSizeBytes; + + /** + * The maximum number of rows to scan. + */ + private final long limit; + + /** + * Set in the builder. If it's not set by the user, it will default to EMPTY_ARRAY. + * It is then reset to the new start primary key of each tablet we open a scanner on as the scan + * moves from one tablet to the next. + */ + private final byte[] startPrimaryKey; + + /** + * Set in the builder. If it's not set by the user, it will default to EMPTY_ARRAY. + * It's never modified after that. + */ + private final byte[] endPrimaryKey; + + private byte[] lastPrimaryKey; + + private final boolean prefetching; + + private final boolean cacheBlocks; + + private final ReadMode readMode; + + private final Common.OrderMode orderMode; + + private final boolean isFaultTolerant; + + private final long startTimestamp; + + private long htTimestamp; + + private long lowerBoundPropagationTimestamp = AsyncKuduClient.NO_TIMESTAMP; + + private final ReplicaSelection replicaSelection; + + private final long keepAlivePeriodMs; + + ///////////////////// + // Runtime variables. + ///////////////////// + + private boolean reuseRowResult = false; + + private final ResourceMetrics resourceMetrics = new ResourceMetrics(); + + private boolean closed = false; + + private boolean canRequestMore = true; + + private long numRowsReturned = 0; + + private RowDataFormat rowDataFormat = RowDataFormat.ROWWISE; + + /** + * The tabletSlice currently being scanned. + * If null, we haven't started scanning. + * If == DONE, then we're done scanning. + * Otherwise it contains a proper tabletSlice name, and we're currently scanning. + */ + private RemoteTablet tablet; + + /** + * This is the scanner ID we got from the TabletServer. + * It's generated randomly so any value is possible. + */ + private byte[] scannerId; + + /** + * The sequence ID of this call. The sequence ID should start at 0 + * with the request for a new scanner, and after each successful request, + * the client should increment it by 1. When retrying a request, the client + * should _not_ increment this value. If the server detects that the client + * missed a chunk of rows from the middle of a scan, it will respond with an + * error. + */ + private int sequenceId; + + final long scanRequestTimeout; + + private String queryId; + + private Timeout keepAliveTimeout; + + /** + * UUID of the tserver which the scanner is bound with. The following scans of + * this scanner will be sent to the tserver. + */ + private String tsUUID; + + /** + * The prefetching result is cached in memory. This atomic reference is used to avoid + * two concurrent prefetchings occur and the latest one overrides the previous one. + */ + private AtomicReference> cachedPrefetcherDeferred = + new AtomicReference<>(); + + /** + * When scanner's prefetching is enabled, there are at most two concurrent ScanRequests + * sent to the tserver. But if the scan data reached the end, only one hasMore=false is returned. + * As a result, one of the ScanRequests got "scanner not found (it may have expired)" exception. + * The same issue occurs for KeepAliveRequest. + * + * @param errorCode error code returned from tserver + * @return true if this can be ignored + */ + boolean canBeIgnored(TabletServerErrorPB.Code errorCode) { + return errorCode == TabletServerErrorPB.Code.SCANNER_EXPIRED && + prefetching && closed; + } + + AsyncKuduScanner(AsyncKuduClient client, KuduTable table, List projectedNames, + List projectedIndexes, ReadMode readMode, boolean isFaultTolerant, + long scanRequestTimeout, + Map predicates, long limit, + boolean cacheBlocks, boolean prefetching, + byte[] startPrimaryKey, byte[] endPrimaryKey, + long startTimestamp, long htTimestamp, + int batchSizeBytes, PartitionPruner pruner, + ReplicaSelection replicaSelection, long keepAlivePeriodMs) { + checkArgument(batchSizeBytes >= 0, "Need non-negative number of bytes, " + + "got %s", batchSizeBytes); + checkArgument(limit > 0, "Need a strictly positive number for the limit, " + + "got %s", limit); + if (htTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + checkArgument(htTimestamp >= 0, "Need non-negative number for the scan, " + + " timestamp got %s", htTimestamp); + checkArgument(readMode == ReadMode.READ_AT_SNAPSHOT, "When specifying a " + + "HybridClock timestamp, the read mode needs to be set to READ_AT_SNAPSHOT"); + } + if (startTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + checkArgument(htTimestamp >= 0, "Must have both start and end timestamps " + + "for a diff scan"); + checkArgument(startTimestamp <= htTimestamp, "Start timestamp must be less " + + "than or equal to end timestamp"); + } + + this.isFaultTolerant = isFaultTolerant; + if (this.isFaultTolerant) { + checkArgument(readMode == ReadMode.READ_AT_SNAPSHOT, "Use of fault tolerance scanner " + + "requires the read mode to be set to READ_AT_SNAPSHOT"); + this.orderMode = Common.OrderMode.ORDERED; + } else { + this.orderMode = Common.OrderMode.UNORDERED; + } + + this.client = client; + this.table = table; + this.pruner = pruner; + this.readMode = readMode; + this.scanRequestTimeout = scanRequestTimeout; + this.predicates = predicates; + this.limit = limit; + this.cacheBlocks = cacheBlocks; + this.prefetching = prefetching; + this.startPrimaryKey = startPrimaryKey; + this.endPrimaryKey = endPrimaryKey; + this.startTimestamp = startTimestamp; + this.htTimestamp = htTimestamp; + this.batchSizeBytes = batchSizeBytes; + this.lastPrimaryKey = AsyncKuduClient.EMPTY_ARRAY; + + // Map the column names to actual columns in the table schema. + // If the user set this to 'null', we scan all columns. + List columns = new ArrayList<>(); + if (projectedNames != null) { + for (String columnName : projectedNames) { + ColumnSchema originalColumn = table.getSchema().getColumn(columnName); + columns.add(getStrippedColumnSchema(originalColumn)); + } + } else if (projectedIndexes != null) { + for (Integer columnIndex : projectedIndexes) { + ColumnSchema originalColumn = table.getSchema().getColumnByIndex(columnIndex); + columns.add(getStrippedColumnSchema(originalColumn)); + } + } else { + // By default, a scanner is created with all columns including auto-incrementing + // column if projected columns are not specified. + columns.addAll(table.getSchema().getColumns()); + } + // This is a diff scan so add the IS_DELETED column. + if (startTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + columns.add(generateIsDeletedColumn(table.getSchema())); + } + this.schema = new Schema(columns); + + // If the partition pruner has pruned all partitions, then the scan can be + // short circuited without contacting any tablet servers. + if (!pruner.hasMorePartitionKeyRanges()) { + LOG.debug("Short circuiting scan"); + this.canRequestMore = false; + this.closed = true; + } + + this.replicaSelection = replicaSelection; + this.keepAlivePeriodMs = keepAlivePeriodMs; + + // For READ_YOUR_WRITES scan mode, get the latest observed timestamp + // and store it. Always use this one as the propagated timestamp for + // the duration of the scan to avoid unnecessary wait. + if (readMode == ReadMode.READ_YOUR_WRITES) { + this.lowerBoundPropagationTimestamp = this.client.getLastPropagatedTimestamp(); + } + } + + AsyncKuduScanner(AsyncKuduClient client, KuduTable table, List projectedNames, + List projectedIndexes, ReadMode readMode, boolean isFaultTolerant, + long scanRequestTimeout, + Map predicates, long limit, + boolean cacheBlocks, boolean prefetching, + byte[] startPrimaryKey, byte[] endPrimaryKey, + long startTimestamp, long htTimestamp, + int batchSizeBytes, PartitionPruner pruner, + ReplicaSelection replicaSelection, long keepAlivePeriodMs, String queryId) { + this( + client, table, projectedNames, projectedIndexes, readMode, isFaultTolerant, + scanRequestTimeout, predicates, limit, cacheBlocks, prefetching, startPrimaryKey, + endPrimaryKey, startTimestamp, htTimestamp, batchSizeBytes, + pruner, replicaSelection, keepAlivePeriodMs); + if (queryId.isEmpty()) { + this.queryId = UUID.randomUUID().toString().replace("-", ""); + } else { + this.queryId = queryId; + } + } + + /** + * Generates and returns a ColumnSchema for the virtual IS_DELETED column. + * The column name is generated to ensure there is never a collision. + * + * @param schema the table schema + * @return a ColumnSchema for the virtual IS_DELETED column + */ + private static ColumnSchema generateIsDeletedColumn(Schema schema) { + StringBuilder columnName = new StringBuilder(DEFAULT_IS_DELETED_COL_NAME); + // If the column already exists and we need to pick an alternate column name. + while (schema.hasColumn(columnName.toString())) { + columnName.append("_"); + } + return new ColumnSchema.ColumnSchemaBuilder(columnName.toString(), Type.BOOL) + .wireType(Common.DataType.IS_DELETED) + .defaultValue(false) + .nullable(false) + .key(false) + .build(); + } + + /** + * Sets isKey to false on the passed ColumnSchema. + * This allows out of order key columns in projections. + * + * TODO: Remove the need for this by handling server side. + * + * @return a new column schema + */ + private static ColumnSchema getStrippedColumnSchema(ColumnSchema columnToClone) { + return new ColumnSchema.ColumnSchemaBuilder(columnToClone) + .key(false) + .build(); + } + + /** + * Returns the maximum number of rows that this scanner was configured to return. + * @return a long representing the maximum number of rows that can be returned + */ + public long getLimit() { + return this.limit; + } + + /** + * Tells if there is data to scan, including both rpc or cached rpc result. + * @return true if there might be more data to scan, else false + */ + public boolean hasMoreRows() { + boolean hasMore = this.canRequestMore || cachedPrefetcherDeferred.get() != null; + if (!hasMore) { + stopKeepAlivePeriodically(); + } + return hasMore; + } + + /** + * Returns if this scanner was configured to cache data blocks or not. + * @return true if this scanner will cache blocks, else else. + */ + public boolean getCacheBlocks() { + return this.cacheBlocks; + } + + /** + * Returns the maximum number of bytes returned by the scanner, on each batch. + * @return a long representing the maximum number of bytes that a scanner can receive at once + * from a tablet server + */ + public long getBatchSizeBytes() { + return this.batchSizeBytes; + } + + /** + * Returns the ReadMode for this scanner. + * @return the configured read mode for this scanner + */ + public ReadMode getReadMode() { + return this.readMode; + } + + private Common.OrderMode getOrderMode() { + return this.orderMode; + } + + /** + * Returns the scan request timeout for this scanner. + * @return the scan request timeout, in milliseconds + */ + public long getScanRequestTimeout() { + return scanRequestTimeout; + } + + /** + * Returns the projection schema of this scanner. If specific columns were + * not specified during scanner creation, the table schema is returned. + * @return the projection schema for this scanner + */ + public Schema getProjectionSchema() { + return this.schema; + } + + public long getKeepAlivePeriodMs() { + return keepAlivePeriodMs; + } + + long getStartSnapshotTimestamp() { + return this.startTimestamp; + } + + /** + * Returns the {@code ResourceMetrics} for this scanner. These metrics are + * updated with each batch of rows returned from the server. + * @return the resource metrics for this scanner + */ + public ResourceMetrics getResourceMetrics() { + return this.resourceMetrics; + } + + long getSnapshotTimestamp() { + return this.htTimestamp; + } + + /** + * If set to true, the {@link RowResult} object returned by the {@link RowResultIterator} + * will be reused with each call to {@link RowResultIterator#next()}. + * This can be a useful optimization to reduce the number of objects created. + * + * Note: DO NOT use this if the RowResult is stored between calls to next(). + * Enabling this optimization means that a call to next() mutates the previously returned + * RowResult. Accessing the previously returned RowResult after a call to next(), by storing all + * RowResults in a collection and accessing them later for example, will lead to all of the + * stored RowResults being mutated as per the data in the last RowResult returned. + */ + public void setReuseRowResult(boolean reuseRowResult) { + this.reuseRowResult = reuseRowResult; + } + + /** + * Optionally set expected row data format. + * + * @param rowDataFormat Row data format to be expected. + */ + public void setRowDataFormat(RowDataFormat rowDataFormat) { + this.rowDataFormat = rowDataFormat; + } + + public String getTsUUID() { + return tsUUID; + } + + /** + * Scans a number of rows. + *

+ * Once this method returns {@code null} once (which indicates that this + * {@code Scanner} is done scanning), calling it again leads to an undefined + * behavior. + * @return a deferred list of rows. + */ + public Deferred nextRows() { + if (closed) { // We're already done scanning. + if (prefetching && cachedPrefetcherDeferred.get() != null) { + // return the cached result and reset the cache. + return cachedPrefetcherDeferred.getAndUpdate((v) -> null); + } + return Deferred.fromResult(null); + } else if (tablet == null) { + Callback, AsyncKuduScanner.Response> cb = + new Callback, Response>() { + @Override + public Deferred call(Response resp) throws Exception { + if (htTimestamp == AsyncKuduClient.NO_TIMESTAMP && + resp.scanTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + // If the server-assigned timestamp is present in the tablet + // server's response, store it in the scanner. The stored value + // is used for read operations in READ_AT_SNAPSHOT mode at + // other tablet servers in the context of the same scan. + htTimestamp = resp.scanTimestamp; + } + + long lastPropagatedTimestamp = AsyncKuduClient.NO_TIMESTAMP; + if (readMode == ReadMode.READ_YOUR_WRITES && + resp.scanTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + // For READ_YOUR_WRITES mode, update the latest propagated timestamp + // with the chosen snapshot timestamp sent back from the server, to + // avoid unnecessarily wait for subsequent reads. Since as long as + // the chosen snapshot timestamp of the next read is greater than + // the previous one, the scan does not violate READ_YOUR_WRITES + // session guarantees. + lastPropagatedTimestamp = resp.scanTimestamp; + } else if (resp.propagatedTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + // Otherwise we just use the propagated timestamp returned from + // the server as the latest propagated timestamp. + lastPropagatedTimestamp = resp.propagatedTimestamp; + } + if (lastPropagatedTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + client.updateLastPropagatedTimestamp(lastPropagatedTimestamp); + } + + if (isFaultTolerant && resp.lastPrimaryKey != null) { + lastPrimaryKey = resp.lastPrimaryKey; + } + + numRowsReturned += resp.data.getNumRows(); + if (resp.resourceMetricsPb != null) { + resourceMetrics.update(resp.resourceMetricsPb); + } + + if (!resp.more || resp.scannerId == null) { + tsUUID = resp.data.getTsUUID(); + scanFinished(); + return Deferred.fromResult(resp.data); // there might be data to return + } + scannerId = resp.scannerId; + sequenceId++; + canRequestMore = resp.more; + tsUUID = resp.data.getTsUUID(); + if (LOG.isDebugEnabled()) { + LOG.debug("Scanner {} opened on {}", Bytes.pretty(scannerId), tablet); + } + return Deferred.fromResult(resp.data); + } + + @Override + public String toString() { + return "scanner opened"; + } + }; + + Callback, Exception> eb = + new Callback, Exception>() { + @Override + public Deferred call(Exception e) throws Exception { + invalidate(); + if (e instanceof NonCoveredRangeException) { + NonCoveredRangeException ncre = (NonCoveredRangeException) e; + pruner.removePartitionKeyRange(ncre.getNonCoveredRangeEnd()); + + // Stop scanning if the non-covered range is past the end partition key. + if (!pruner.hasMorePartitionKeyRanges()) { + canRequestMore = false; + closed = true; // the scanner is closed on the other side at this point + return Deferred.fromResult(RowResultIterator.empty()); + } + scannerId = null; + sequenceId = 0; + return nextRows(); + } else { + LOG.debug("Can not open scanner", e); + // Don't let the scanner think it's opened on this tablet. + return Deferred.fromError(e); // Let the error propagate. + } + } + + @Override + public String toString() { + return "open scanner errback"; + } + }; + + // We need to open the scanner first. + return client.sendRpcToTablet(getOpenRequest()).addCallbackDeferring(cb).addErrback(eb); + } else if (prefetching && cachedPrefetcherDeferred.get() != null) { + Deferred prefetcherDeferred = + cachedPrefetcherDeferred.getAndUpdate((v) -> null); + prefetcherDeferred.chain(new Deferred().addCallback(prefetch)); + return prefetcherDeferred; + } + final Deferred d = + client.scanNextRows(this).addCallbacks(gotNextRow, nextRowErrback()); + if (prefetching) { + d.chain(new Deferred().addCallback(prefetch)); + } + return d; + } + + private final Callback prefetch = + new Callback() { + @Override + public RowResultIterator call(RowResultIterator arg) throws Exception { + if (canRequestMore) { + if (cachedPrefetcherDeferred.get() == null) { + Deferred prefetcherDeferred = + client.scanNextRows(AsyncKuduScanner.this) + .addCallbacks(gotNextRow, nextRowErrback()); + if (!cachedPrefetcherDeferred.compareAndSet(null, prefetcherDeferred)) { + LOG.info("Skip one prefetching because two concurrent prefetching scan occurs"); + } + } + } + return null; + } + }; + + /** + * Singleton callback to handle responses of "next" RPCs. + * This returns an {@code ArrayList>} (possibly inside a + * deferred one). + */ + private final Callback gotNextRow = + new Callback() { + @Override + public RowResultIterator call(final Response resp) { + long lastPropagatedTimestamp = AsyncKuduClient.NO_TIMESTAMP; + if (readMode == ReadMode.READ_YOUR_WRITES && + resp.scanTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + // For READ_YOUR_WRITES mode, update the latest propagated timestamp + // with the chosen snapshot timestamp sent back from the server, to + // avoid unnecessarily wait for subsequent reads. Since as long as + // the chosen snapshot timestamp of the next read is greater than + // the previous one, the scan does not violate READ_YOUR_WRITES + // session guarantees. + lastPropagatedTimestamp = resp.scanTimestamp; + } else if (resp.propagatedTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + // Otherwise we just use the propagated timestamp returned from + // the server as the latest propagated timestamp. + lastPropagatedTimestamp = resp.propagatedTimestamp; + } + if (lastPropagatedTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + client.updateLastPropagatedTimestamp(lastPropagatedTimestamp); + } + numRowsReturned += resp.data.getNumRows(); + if (isFaultTolerant && resp.lastPrimaryKey != null) { + lastPrimaryKey = resp.lastPrimaryKey; + } + if (resp.resourceMetricsPb != null) { + resourceMetrics.update(resp.resourceMetricsPb); + } + if (!resp.more) { // We're done scanning this tablet. + scanFinished(); + return resp.data; + } + sequenceId++; + canRequestMore = resp.more; + return resp.data; + } + + @Override + public String toString() { + return "get nextRows response"; + } + }; + + /** + * Creates a new errback to handle errors while trying to get more rows. + */ + private final Callback, Exception> nextRowErrback() { + return new Callback, Exception>() { + @Override + public Deferred call(Exception e) throws Exception { + final RemoteTablet old_tablet = tablet; // Save before invalidate(). + invalidate(); // If there was an error, don't assume we're still OK. + // If encountered FaultTolerantScannerExpiredException, it means the + // fault tolerant scanner on the server side expired. Therefore, open + // a new scanner. + if (e instanceof FaultTolerantScannerExpiredException) { + scannerId = null; + sequenceId = 0; + LOG.warn("Scanner expired, creating a new one {}", AsyncKuduScanner.this); + return nextRows(); + } else { + LOG.warn("{} pretends to not know {}", old_tablet, AsyncKuduScanner.this, e); + return Deferred.fromError(e); // Let the error propagate. + } + } + + @Override + public String toString() { + return "NextRow errback"; + } + }; + } + + void scanFinished() { + Partition partition = tablet.getPartition(); + pruner.removePartitionKeyRange(partition.getPartitionKeyEnd()); + // Stop scanning if we have scanned until or past the end partition key, or + // if we have fulfilled the limit. + if (!pruner.hasMorePartitionKeyRanges() || numRowsReturned >= limit) { + canRequestMore = false; + closed = true; // the scanner is closed on the other side at this point + return; + } + if (LOG.isDebugEnabled()) { + LOG.debug("Done scanning tablet {} for partition {} with scanner id {}", + tablet.getTabletId(), tablet.getPartition(), Bytes.pretty(scannerId)); + } + scannerId = null; + sequenceId = 0; + lastPrimaryKey = AsyncKuduClient.EMPTY_ARRAY; + invalidate(); + } + + /** + * @return true if the scanner has been closed. + */ + public boolean isClosed() { + return closed; + } + + /** + * Closes this scanner (don't forget to call this when you're done with it!). + *

+ * Closing a scanner already closed has no effect. The deferred returned + * will be called back immediately. + * @return A deferred object that indicates the completion of the request. + * The {@link Object} can be null, a RowResultIterator if there was data left + * in the scanner, or an Exception. + */ + public Deferred close() { + if (closed) { + return Deferred.fromResult(null); + } + stopKeepAlivePeriodically(); + return client.closeScanner(this).addCallback(closedCallback()); // TODO errBack ? + } + + /** Callback+Errback invoked when the TabletServer closed our scanner. */ + private Callback closedCallback() { + return new Callback() { + @Override + public RowResultIterator call(Response response) { + closed = true; + if (LOG.isDebugEnabled()) { + LOG.debug("Scanner {} closed on {}", Bytes.pretty(scannerId), tablet); + } + invalidate(); + scannerId = "client debug closed".getBytes(UTF_8); // Make debugging easier. + return response == null ? null : response.data; + } + + @Override + public String toString() { + return "scanner closed"; + } + }; + } + + @Override + public String toString() { + final String tablet = this.tablet == null ? "null" : this.tablet.getTabletId(); + final StringBuilder buf = new StringBuilder(); + buf.append("KuduScanner(table="); + buf.append(table.getName()); + buf.append(", tablet=").append(tablet); + buf.append(", scannerId=").append(Bytes.pretty(scannerId)); + buf.append(", scanRequestTimeout=").append(scanRequestTimeout); + if (startPrimaryKey.length > 0) { + buf.append(", startPrimaryKey=").append(Bytes.hex(startPrimaryKey)); + } else { + buf.append(", startPrimaryKey="); + } + if (endPrimaryKey.length > 0) { + buf.append(", endPrimaryKey=").append(Bytes.hex(endPrimaryKey)); + } else { + buf.append(", endPrimaryKey="); + } + if (lastPrimaryKey.length > 0) { + buf.append(", lastPrimaryKey=").append(Bytes.hex(lastPrimaryKey)); + } else { + buf.append(", lastPrimaryKey="); + } + buf.append(')'); + return buf.toString(); + } + + // ---------------------- // + // Package private stuff. // + // ---------------------- // + + KuduTable table() { + return table; + } + + /** + * Invalidates this scanner and makes it assume it's no longer opened. + * When a TabletServer goes away while we're scanning it, or some other type + * of access problem happens, this method should be called so that the + * scanner will have to re-locate the TabletServer and re-open itself. + */ + void invalidate() { + tablet = null; + } + + /** + * Returns the tabletSlice currently being scanned, if any. + */ + RemoteTablet currentTablet() { + return tablet; + } + + /** + * Gets the replica selection mechanism being used. + * + * @return the replica selection mechanism. + */ + ReplicaSelection getReplicaSelection() { + return replicaSelection; + } + + /** + * Returns an RPC to open this scanner. + */ + KuduRpc getOpenRequest() { + checkScanningNotStarted(); + return new ScanRequest(table, State.OPENING, tablet); + } + + /** + * Keep the current remote scanner alive. + *

+ * Keep the current remote scanner alive on the Tablet server for an + * additional time-to-live. This is useful if the interval in between + * nextRows() calls is big enough that the remote scanner might be garbage + * collected. The scanner time-to-live can be configured on the tablet + * server via the --scanner_ttl_ms configuration flag and has a default + * of 60 seconds. + *

+ * This does not invalidate any previously fetched results. + *

+ * Note that an error returned by this method should not be taken as indication + * that the scan has failed. Subsequent calls to nextRows() might still be successful, + * particularly if the scanner is configured to be fault tolerant. + * @return A deferred object that indicates the completion of the request. + * @throws IllegalStateException if the scanner is already closed. + */ + public Deferred keepAlive() { + if (closed) { + if (prefetching && cachedPrefetcherDeferred.get() != null) { + // skip sending keep alive if all of the data has been fetched in prefetching mode + return Deferred.fromResult(null); + } + throw new IllegalStateException("Scanner has already been closed"); + } + return client.keepAlive(this); + } + + /** + * Package-private access point for {@link AsyncKuduScanner}s to keep themselves + * alive on tablet servers by sending keep-alive requests periodically. + * @param keepAliveIntervalMS the interval of sending keep-alive requests. + * @return true if starting keep-alive timer successfully. + */ + boolean startKeepAlivePeriodically(int keepAliveIntervalMS) { + if (closed) { + return false; + } + final class KeepAliveTimer implements TimerTask { + @Override + public void run(final Timeout timeout) { + keepAlive(); + keepAliveTimeout = AsyncKuduClient.newTimeout(client.getTimer(), this, keepAliveIntervalMS); + } + } + + keepAliveTimeout = + AsyncKuduClient.newTimeout(client.getTimer(), new KeepAliveTimer(), keepAliveIntervalMS); + return true; + } + + /** + * Package-private access point for {@link AsyncKuduScanner}s to stop + * keep-alive timer. + * @return true if stopping keep-alive timer successfully. + */ + boolean stopKeepAlivePeriodically() { + if (keepAliveTimeout != null) { + return keepAliveTimeout.cancel(); + } + return true; + } + + /** + * Returns an RPC to fetch the next rows. + */ + KuduRpc getNextRowsRequest() { + return new ScanRequest(table, State.NEXT, tablet); + } + + /** + * Returns an RPC to close this scanner. + */ + KuduRpc getCloseRequest() { + return new ScanRequest(table, State.CLOSING, tablet); + } + + /** + * Returns an RPC to keep this scanner alive on the tablet server. + * @return a new {@link KeepAliveRequest} + */ + KuduRpc getKeepAliveRequest() { + return new KeepAliveRequest(table, tablet); + } + + /** + * Throws an exception if scanning already started. + * @throws IllegalStateException if scanning already started. + */ + private void checkScanningNotStarted() { + if (tablet != null) { + throw new IllegalStateException("scanning already started"); + } + } + + /** + * Helper object that contains all the info sent by a TS after a Scan request. + */ + static final class Response { + /** The ID associated with the scanner that issued the request. */ + private final byte[] scannerId; + /** The actual payload of the response. */ + private final RowResultIterator data; + + /** + * If false, the filter we use decided there was no more data to scan. + * In this case, the server has automatically closed the scanner for us, + * so we don't need to explicitly close it. + */ + private final boolean more; + + /** + * Server-assigned timestamp for the scan operation. It's used when + * the scan operates in READ_AT_SNAPSHOT mode and the timestamp is not + * specified explicitly. The field is set with the snapshot timestamp sent + * in the response from the very first tablet server contacted while + * fetching data from corresponding tablets. If the tablet server does not + * send the snapshot timestamp in its response, this field is assigned + * a special value AsyncKuduClient.NO_TIMESTAMP. + */ + private final long scanTimestamp; + + /** + * The server timestamp to propagate, if set. If the server response does + * not contain propagated timestamp, this field is set to special value + * AsyncKuduClient.NO_TIMESTAMP + */ + private final long propagatedTimestamp; + + private final byte[] lastPrimaryKey; + + private final ResourceMetricsPB resourceMetricsPb; + + Response(final byte[] scannerId, + final RowResultIterator data, + final boolean more, + final long scanTimestamp, + final long propagatedTimestamp, + final byte[] lastPrimaryKey, + final ResourceMetricsPB resourceMetricsPb) { + this.scannerId = scannerId; + this.data = data; + this.more = more; + this.scanTimestamp = scanTimestamp; + this.propagatedTimestamp = propagatedTimestamp; + this.lastPrimaryKey = lastPrimaryKey; + this.resourceMetricsPb = resourceMetricsPb; + } + + @Override + public String toString() { + String ret = "AsyncKuduScanner$Response(scannerId = " + Bytes.pretty(scannerId) + + ", data = " + data + ", more = " + more; + if (scanTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + ret += ", responseScanTimestamp = " + scanTimestamp; + } + ret += ")"; + return ret; + } + } + + private enum State { + OPENING, + NEXT, + CLOSING + } + + /** + * RPC sent out to keep a scanner alive on a TabletServer. + */ + final class KeepAliveRequest extends KuduRpc { + + KeepAliveRequest(KuduTable table, RemoteTablet tablet) { + super(table, client.getTimer(), scanRequestTimeout); + setTablet(tablet); + } + + @Override + String serviceName() { + return TABLET_SERVER_SERVICE_NAME; + } + + @Override + String method() { + return "ScannerKeepAlive"; + } + + @Override + ReplicaSelection getReplicaSelection() { + return replicaSelection; + } + + /** Serializes this request. */ + @Override + Message createRequestPB() { + final ScannerKeepAliveRequestPB.Builder builder = ScannerKeepAliveRequestPB.newBuilder(); + builder.setScannerId(UnsafeByteOperations.unsafeWrap(scannerId)); + return builder.build(); + } + + @Override + public byte[] partitionKey() { + // This key is used to lookup where the request needs to go + return pruner.nextPartitionKey(); + } + + @Override + Pair deserialize(final CallResponse callResponse, + String tsUUID) throws KuduException { + ScannerKeepAliveResponsePB.Builder builder = ScannerKeepAliveResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), builder); + ScannerKeepAliveResponsePB resp = builder.build(); + TabletServerErrorPB error = null; + if (resp.hasError()) { + if (canBeIgnored(resp.getError().getCode())) { + LOG.info("Ignore false alert of scanner not found for keep alive request"); + } else { + error = resp.getError(); + } + } + return new Pair<>(null, error); + } + } + + /** + * RPC sent out to fetch the next rows from the TabletServer. + */ + final class ScanRequest extends KuduRpc { + + private final State state; + + /** The token with which to authorize this RPC. */ + private Token.SignedTokenPB authzToken; + + ScanRequest(KuduTable table, State state, RemoteTablet tablet) { + super(table, client.getTimer(), scanRequestTimeout); + setTablet(tablet); + this.state = state; + } + + @Override + String serviceName() { + return TABLET_SERVER_SERVICE_NAME; + } + + @Override + String method() { + return "Scan"; + } + + @Override + Collection getRequiredFeatures() { + if (predicates.isEmpty()) { + return ImmutableList.of(); + } else { + return ImmutableList.of(Tserver.TabletServerFeatures.COLUMN_PREDICATES_VALUE); + } + } + + @Override + ReplicaSelection getReplicaSelection() { + return replicaSelection; + } + + @Override + boolean needsAuthzToken() { + return true; + } + + @Override + void bindAuthzToken(Token.SignedTokenPB token) { + authzToken = token; + } + + /** Serializes this request. */ + @Override + Message createRequestPB() { + final ScanRequestPB.Builder builder = ScanRequestPB.newBuilder(); + switch (state) { + case OPENING: + // Save the tablet in the AsyncKuduScanner. This kind of a kludge but it really + // is the easiest way. + AsyncKuduScanner.this.tablet = super.getTablet(); + NewScanRequestPB.Builder newBuilder = NewScanRequestPB.newBuilder(); + newBuilder.setLimit(limit - AsyncKuduScanner.this.numRowsReturned); + newBuilder.addAllProjectedColumns(ProtobufHelper.schemaToListPb(schema)); + newBuilder.setTabletId(UnsafeByteOperations.unsafeWrap(tablet.getTabletIdAsBytes())); + newBuilder.setOrderMode(AsyncKuduScanner.this.getOrderMode()); + newBuilder.setCacheBlocks(cacheBlocks); + + long rowFormatFlags = Tserver.RowFormatFlags.NO_FLAGS_VALUE; + if (rowDataFormat == RowDataFormat.COLUMNAR) { + rowFormatFlags |= Tserver.RowFormatFlags.COLUMNAR_LAYOUT.getNumber(); + } + newBuilder.setRowFormatFlags(rowFormatFlags); + // If the last propagated timestamp is set, send it with the scan. + // For READ_YOUR_WRITES scan, use the propagated timestamp from + // the scanner. + long timestamp; + if (readMode == ReadMode.READ_YOUR_WRITES) { + timestamp = lowerBoundPropagationTimestamp; + } else { + timestamp = table.getAsyncClient().getLastPropagatedTimestamp(); + } + if (timestamp != AsyncKuduClient.NO_TIMESTAMP) { + newBuilder.setPropagatedTimestamp(timestamp); + } + newBuilder.setReadMode(AsyncKuduScanner.this.getReadMode().pbVersion()); + + // if the mode is set to read on snapshot set the snapshot timestamps. + if (AsyncKuduScanner.this.getReadMode() == ReadMode.READ_AT_SNAPSHOT) { + if (AsyncKuduScanner.this.getSnapshotTimestamp() != AsyncKuduClient.NO_TIMESTAMP) { + newBuilder.setSnapTimestamp(AsyncKuduScanner.this.getSnapshotTimestamp()); + } + if (AsyncKuduScanner.this.getStartSnapshotTimestamp() != AsyncKuduClient.NO_TIMESTAMP) { + newBuilder.setSnapStartTimestamp(AsyncKuduScanner.this.getStartSnapshotTimestamp()); + } + } + + if (isFaultTolerant && AsyncKuduScanner.this.lastPrimaryKey.length > 0) { + newBuilder.setLastPrimaryKey(UnsafeByteOperations.unsafeWrap(lastPrimaryKey)); + } + + if (AsyncKuduScanner.this.startPrimaryKey.length > 0) { + newBuilder.setStartPrimaryKey(UnsafeByteOperations.unsafeWrap(startPrimaryKey)); + } + + if (AsyncKuduScanner.this.endPrimaryKey.length > 0) { + newBuilder.setStopPrimaryKey(UnsafeByteOperations.unsafeWrap(endPrimaryKey)); + } + + for (KuduPredicate pred : predicates.values()) { + newBuilder.addColumnPredicates(pred.toPB()); + } + if (authzToken != null) { + newBuilder.setAuthzToken(authzToken); + } + builder.setNewScanRequest(newBuilder.build()) + .setBatchSizeBytes(batchSizeBytes); + break; + case NEXT: + builder.setScannerId(UnsafeByteOperations.unsafeWrap(scannerId)) + .setCallSeqId(AsyncKuduScanner.this.sequenceId) + .setBatchSizeBytes(batchSizeBytes); + break; + case CLOSING: + builder.setScannerId(UnsafeByteOperations.unsafeWrap(scannerId)) + .setBatchSizeBytes(0) + .setCloseScanner(true); + break; + default: + throw new RuntimeException("unreachable!"); + } + builder.setQueryId(UnsafeByteOperations.unsafeWrap(queryId.getBytes(UTF_8))); + + return builder.build(); + } + + @Override + Pair deserialize(final CallResponse callResponse, + String tsUUID) throws KuduException { + ScanResponsePB.Builder builder = ScanResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), builder); + ScanResponsePB resp = builder.build(); + final byte[] id = resp.getScannerId().toByteArray(); + TabletServerErrorPB error = resp.hasError() ? resp.getError() : null; + + // Error handling. + if (error != null) { + if (canBeIgnored(resp.getError().getCode())) { + LOG.info("Ignore false alert of scanner not found for scan request"); + error = null; + } else { + switch (error.getCode()) { + case TABLET_NOT_FOUND: + case TABLET_NOT_RUNNING: + if (state == State.OPENING || (state == State.NEXT && isFaultTolerant)) { + // Doing this will trigger finding the new location. + return new Pair<>(null, error); + } else { + Status statusIncomplete = Status.Incomplete("Cannot continue scanning, " + + "the tablet has moved and this isn't a fault tolerant scan"); + throw new NonRecoverableException(statusIncomplete); + } + case SCANNER_EXPIRED: + if (isFaultTolerant) { + Status status = Status.fromTabletServerErrorPB(error); + throw new FaultTolerantScannerExpiredException(status); + } + // fall through + default: + break; + } + } + } + // TODO: Find a clean way to plumb in reuseRowResult. + RowResultIterator iterator; + if (resp.hasData()) { + iterator = RowwiseRowResultIterator.makeRowResultIterator( + timeoutTracker.getElapsedMillis(), tsUUID, schema, resp.getData(), + callResponse, reuseRowResult); + } else { + iterator = ColumnarRowResultIterator.makeRowResultIterator( + timeoutTracker.getElapsedMillis(), tsUUID, schema, resp.getColumnarData(), + callResponse, reuseRowResult); + } + + boolean hasMore = resp.getHasMoreResults(); + if (id.length != 0 && scannerId != null && !Bytes.equals(scannerId, id)) { + Status statusIllegalState = Status.IllegalState("Scan RPC response was for scanner" + + " ID " + Bytes.pretty(id) + " but we expected " + + Bytes.pretty(scannerId)); + throw new NonRecoverableException(statusIllegalState); + } + ResourceMetricsPB resourceMetricsPB = resp.hasResourceMetrics() ? + resp.getResourceMetrics() : null; + Response response = new Response(id, iterator, hasMore, + resp.hasSnapTimestamp() ? resp.getSnapTimestamp() + : AsyncKuduClient.NO_TIMESTAMP, + resp.hasPropagatedTimestamp() ? resp.getPropagatedTimestamp() + : AsyncKuduClient.NO_TIMESTAMP, + resp.getLastPrimaryKey().toByteArray(), resourceMetricsPB); + if (LOG.isDebugEnabled()) { + LOG.debug("{} for scanner {}", response, AsyncKuduScanner.this); + } + return new Pair<>(response, error); + } + + @Override + public String toString() { + return "ScanRequest(scannerId=" + Bytes.pretty(scannerId) + + ", state=" + state + + (tablet != null ? ", tablet=" + tablet.getTabletId() : "") + + ", attempt=" + attempt + ", " + super.toString() + ")"; + } + + @Override + public byte[] partitionKey() { + // This key is used to lookup where the request needs to go + return pruner.nextPartitionKey(); + } + } + + /** + * A Builder class to build {@link AsyncKuduScanner}. + * Use {@link AsyncKuduClient#newScannerBuilder} in order to get a builder instance. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static class AsyncKuduScannerBuilder + extends AbstractKuduScannerBuilder { + + AsyncKuduScannerBuilder(AsyncKuduClient client, KuduTable table) { + super(client, table); + } + + /** + * Builds an {@link AsyncKuduScanner} using the passed configurations. + * @return a new {@link AsyncKuduScanner} + */ + @Override + public AsyncKuduScanner build() { + return new AsyncKuduScanner( + client, table, projectedColumnNames, projectedColumnIndexes, readMode, isFaultTolerant, + scanRequestTimeout, predicates, limit, cacheBlocks, prefetching, lowerBoundPrimaryKey, + upperBoundPrimaryKey, startTimestamp, htTimestamp, batchSizeBytes, + PartitionPruner.create(this), replicaSelection, keepAlivePeriodMs, queryId); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduSession.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduSession.java new file mode 100644 index 0000000000..e1a2f65103 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduSession.java @@ -0,0 +1,1106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.client.ExternalConsistencyMode.CLIENT_PROPAGATED; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.concurrent.GuardedBy; +import javax.annotation.concurrent.NotThreadSafe; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.stumbleupon.async.Callback; +import com.stumbleupon.async.Deferred; +import io.netty.util.Timeout; +import io.netty.util.TimerTask; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.Schema; +import org.apache.kudu.client.AsyncKuduClient.LookupType; +import org.apache.kudu.util.AsyncUtil; +import org.apache.kudu.util.LogThrottler; +import org.apache.kudu.util.Slice; + +/** + * An {@code AsyncKuduSession} belongs to a specific {@link AsyncKuduClient}, and represents a + * context in which all write data access should take place. Within a session, + * multiple operations may be accumulated and batched together for better + * efficiency. Settings like timeouts, priorities, and trace IDs are also set + * per session. + * + *

{@code AsyncKuduSession} is separate from {@link AsyncKuduClient} because, in a multi-threaded + * application, different threads may need to concurrently execute + * transactions. Similar to a JDBC "session", transaction boundaries will be + * delineated on a per-session basis -- in between a "BeginTransaction" and + * "Commit" call on a given session, all operations will be part of the same + * transaction. Meanwhile another concurrent session object can safely run + * non-transactional work or other transactions without interfering. + * + *

Therefore, this class is not thread-safe. + * + *

Additionally, there is a guarantee that writes from different sessions do not + * get batched together into the same RPCs -- this means that latency-sensitive + * clients can run through the same {@link AsyncKuduClient} object as throughput-oriented + * clients, perhaps by setting the latency-sensitive session's timeouts low and + * priorities high. Without the separation of batches, a latency-sensitive + * single-row insert might get batched along with 10MB worth of inserts from the + * batch writer, thus delaying the response significantly. + * + *

Timeouts are handled differently depending on the flush mode. + * With {@link SessionConfiguration.FlushMode#AUTO_FLUSH_SYNC AUTO_FLUSH_SYNC}, the timeout is set + * on each {@linkplain #apply apply}()'d operation. + * With {@link SessionConfiguration.FlushMode#AUTO_FLUSH_BACKGROUND AUTO_FLUSH_BACKGROUND} and + * {@link SessionConfiguration.FlushMode#MANUAL_FLUSH MANUAL_FLUSH}, the timeout is assigned to a + * whole batch of operations upon {@linkplain #flush flush}()'ing. It means that in a situation + * with a timeout of 500ms and a flush interval of 1000ms, an operation can be outstanding for up to + * 1500ms before being timed out. + * + *

Warning: a note on out-of-order operations + * + *

When using {@code AsyncKuduSession}, it is not difficult to trigger concurrent flushes on + * the same session. The result is that operations applied in a particular order within a single + * session may be applied in a different order on the server side, even for a single tablet. To + * prevent this behavior, ensure that only one flush is outstanding at a given time (the maximum + * concurrent flushes per {@code AsyncKuduSession} is hard-coded to 2). + * + *

If operation interleaving would be unacceptable for your application, consider using one of + * the following strategies to avoid it: + * + *

    + *
  1. When using {@link SessionConfiguration.FlushMode#MANUAL_FLUSH MANUAL_FLUSH} mode, + * wait for one {@link #flush flush()} to {@code join()} before triggering another flush. + *
  2. When using {@link SessionConfiguration.FlushMode#AUTO_FLUSH_SYNC AUTO_FLUSH_SYNC} + * mode, wait for each {@link #apply apply()} to {@code join()} before applying another operation. + *
  3. Consider not using + * {@link SessionConfiguration.FlushMode#AUTO_FLUSH_BACKGROUND AUTO_FLUSH_BACKGROUND} mode. + *
  4. Make your application resilient to out-of-order application of writes. + *
  5. Avoid applying an {@link Operation} on a particular row until any previous write to that + * row has been successfully flushed. + *
+ * + *

For more information on per-session operation interleaving, see + * KUDU-1767. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +@NotThreadSafe +public class AsyncKuduSession implements SessionConfiguration { + + public static final Logger LOG = LoggerFactory.getLogger(AsyncKuduSession.class); + /** + * Instance of LogThrottler isn't static so we can throttle messages per session + */ + private final LogThrottler throttleClosedLog = new LogThrottler(LOG); + + private final AsyncKuduClient client; + private final Random randomizer = new Random(); + private final ErrorCollector errorCollector; + private int flushIntervalMillis = 1000; + private int mutationBufferMaxOps = 1000; + + // NOTE : -1 means no limit, set a positive value to limit the max size. + private long mutationBufferMaxSize = -1; + private FlushMode flushMode; + private ExternalConsistencyMode consistencyMode; + private long timeoutMillis; + private final long txnId; + + /** + * Protects internal state from concurrent access. {@code AsyncKuduSession} is not threadsafe + * from the application's perspective, but because internally async timers and async flushing + * tasks may access the session concurrently with the application, synchronization is still + * needed. + */ + private final Object monitor = new Object(); + + /** + * Tracks the currently active buffer. + * + * When in mode {@link FlushMode#AUTO_FLUSH_BACKGROUND} or {@link FlushMode#AUTO_FLUSH_SYNC}, + * {@code AsyncKuduSession} uses double buffering to improve write throughput. While the + * application is {@link #apply}ing operations to one buffer (the {@code activeBuffer}), the + * second buffer is either being flushed, or if it has already been flushed, it waits in the + * {@link #inactiveBuffers} queue. When the currently active buffer is flushed, + * {@code activeBuffer} is set to {@code null}. On the next call to {@code apply}, an inactive + * buffer is taken from {@code inactiveBuffers} and made the new active buffer. If both + * buffers are still flushing, then the {@code apply} call throws {@link PleaseThrottleException}. + */ + @GuardedBy("monitor") + private Buffer activeBuffer; + + /** + * The buffers. May either be active (pointed to by {@link #activeBuffer}, + * inactive (in the {@link #inactiveBuffers}) queue, or flushing. + */ + private final Buffer bufferA = new Buffer(); + private final Buffer bufferB = new Buffer(); + + /** + * Queue containing flushed, inactive buffers. May be accessed from callbacks (I/O threads). + * We restrict the session to only two buffers, so {@link BlockingQueue#add} can + * be used without chance of failure. + */ + private final BlockingQueue inactiveBuffers = new ArrayBlockingQueue<>(2, false); + + /** + * Deferred used to notify on flush events. Atomically swapped and completed every time a buffer + * is flushed. This can be used to notify handlers of {@link PleaseThrottleException} that more + * capacity may be available in the active buffer. + */ + private final AtomicReference> flushNotification = + new AtomicReference<>(new Deferred<>()); + + /** + * Tracks whether the session has been closed. + */ + private volatile boolean closed = false; + + private boolean ignoreAllDuplicateRows = false; + private boolean ignoreAllNotFoundRows = false; + + /** + * Cumulative operation metrics since the beginning of the session. + */ + private final ResourceMetrics writeOpMetrics = new ResourceMetrics(); + + /** + * Package-private constructor meant to be used via AsyncKuduClient + * @param client client that creates this session + */ + AsyncKuduSession(AsyncKuduClient client) { + this.client = client; + this.txnId = AsyncKuduClient.INVALID_TXN_ID; + flushMode = FlushMode.AUTO_FLUSH_SYNC; + consistencyMode = CLIENT_PROPAGATED; + timeoutMillis = client.getDefaultOperationTimeoutMs(); + inactiveBuffers.add(bufferA); + inactiveBuffers.add(bufferB); + errorCollector = new ErrorCollector(mutationBufferMaxOps); + } + + /** + * Constructor for a transactional session. + * @param client client that creates this session + * @param txnId transaction identifier for all operations within the session + */ + AsyncKuduSession(AsyncKuduClient client, long txnId) { + assert txnId > AsyncKuduClient.INVALID_TXN_ID; + this.client = client; + this.txnId = txnId; + flushMode = FlushMode.AUTO_FLUSH_SYNC; + consistencyMode = CLIENT_PROPAGATED; + timeoutMillis = client.getDefaultOperationTimeoutMs(); + inactiveBuffers.add(bufferA); + inactiveBuffers.add(bufferB); + errorCollector = new ErrorCollector(mutationBufferMaxOps); + } + + @Override + public FlushMode getFlushMode() { + return this.flushMode; + } + + // TODO(wdberkeley): KUDU-1944. Don't let applications change the flush mode. Use a new session. + @Override + public void setFlushMode(FlushMode flushMode) { + if (hasPendingOperations()) { + throw new IllegalArgumentException("Cannot change flush mode when writes are buffered"); + } + this.flushMode = flushMode; + } + + @Override + public void setExternalConsistencyMode(ExternalConsistencyMode consistencyMode) { + if (hasPendingOperations()) { + throw new IllegalArgumentException("Cannot change consistency mode " + + "when writes are buffered"); + } + this.consistencyMode = consistencyMode; + } + + @Override + public void setMutationBufferSpace(int numOps, long maxSize) { + if (hasPendingOperations()) { + throw new IllegalArgumentException("Cannot change the buffer" + + " size when operations are buffered"); + } + this.mutationBufferMaxOps = numOps; + this.mutationBufferMaxSize = maxSize; + } + + @Override + public void setErrorCollectorSpace(int size) { + this.errorCollector.resize(size); + } + + @Deprecated + @Override + public void setMutationBufferLowWatermark(float mutationBufferLowWatermarkPercentage) { + LOG.warn("setMutationBufferLowWatermark is deprecated"); + } + + /** + * Lets us set a specific seed for tests + * @param seed the seed to use + */ + @InterfaceAudience.LimitedPrivate("Test") + void setRandomSeed(long seed) { + this.randomizer.setSeed(seed); + } + + @Override + public void setFlushInterval(int flushIntervalMillis) { + this.flushIntervalMillis = flushIntervalMillis; + } + + @Override + public void setTimeoutMillis(long timeout) { + this.timeoutMillis = timeout; + } + + @Override + public long getTimeoutMillis() { + return this.timeoutMillis; + } + + @Override + public boolean isClosed() { + return closed; + } + + @Override + public boolean isIgnoreAllDuplicateRows() { + return ignoreAllDuplicateRows; + } + + @Override + public void setIgnoreAllDuplicateRows(boolean ignoreAllDuplicateRows) { + this.ignoreAllDuplicateRows = ignoreAllDuplicateRows; + } + + @Override + public boolean isIgnoreAllNotFoundRows() { + return ignoreAllNotFoundRows; + } + + @Override + public void setIgnoreAllNotFoundRows(boolean ignoreAllNotFoundRows) { + this.ignoreAllNotFoundRows = ignoreAllNotFoundRows; + } + + @Override + public int countPendingErrors() { + return errorCollector.countErrors(); + } + + @Override + public RowErrorsAndOverflowStatus getPendingErrors() { + return errorCollector.getErrors(); + } + + @Override + public ResourceMetrics getWriteOpMetrics() { + return this.writeOpMetrics; + } + + /** + * Flushes the buffered operations and marks this session as closed. + * See the javadoc on {@link #flush()} on how to deal with exceptions coming out of this method. + * @return a Deferred whose callback chain will be invoked when. + * everything that was buffered at the time of the call has been flushed. + */ + public Deferred> close() { + if (!closed) { + closed = true; + client.removeSession(this); + } + return flush(); + } + + /** + * Callback which waits for all tablet location lookups to complete, groups all operations into + * batches by tablet, puts operations into extraBatches which have different schemas with the + * ones in batches, then dispatches them. When all of the batches are complete, a deferred is + * fired and the buffer is added to the inactive queue. + */ + private final class TabletLookupCB implements Callback { + private final AtomicInteger lookupsOutstanding; + private final Buffer buffer; + private final Deferred> deferred; + + public TabletLookupCB(Buffer buffer, Deferred> deferred) { + this.lookupsOutstanding = new AtomicInteger(buffer.numOps()); + this.buffer = buffer; + this.deferred = deferred; + } + + @Override + public Void call(Object unused) throws Exception { + if (lookupsOutstanding.decrementAndGet() != 0) { + return null; + } + + // The final tablet lookup is complete. Batch all of the buffered + // operations into their respective tablet, and then send the batches. + + // Group the operations by tablet. If two operations belong to the same + // tablet but have different table schemas, they will be put into two + // separate batches. + Map> batches = new HashMap<>(); + List opsFailedInLookup = new ArrayList<>(); + List opsFailedIndexesList = new ArrayList<>(); + + int currentIndex = 0; + for (BufferedOperation bufferedOp : buffer) { + Operation operation = bufferedOp.getOperation(); + if (bufferedOp.tabletLookupFailed()) { + Exception failure = bufferedOp.getTabletLookupFailure(); + RowError error; + if (failure instanceof NonCoveredRangeException) { + // TODO: this should be something different than NotFound so that + // applications can distinguish from updates on missing rows. + error = new RowError(Status.NotFound(String.format( + "%s: %s", failure.getMessage(), operation.getTable().getName())), operation); + } else { + LOG.warn("unexpected tablet lookup failure for operation {}", operation, failure); + error = new RowError(Status.RuntimeError(failure.getMessage()), operation); + } + OperationResponse response = new OperationResponse(0, null, 0, operation, error); + // Add the row error to the error collector if the session is in background flush mode, + // and complete the operation's deferred with the error response. The ordering between + // adding to the error collector and completing the deferred should not matter since + // applications should be using one or the other method for error handling, not both. + if (flushMode == FlushMode.AUTO_FLUSH_BACKGROUND) { + errorCollector.addError(error); + } + operation.callback(response); + opsFailedInLookup.add(response); + opsFailedIndexesList.add(currentIndex++); + continue; + } + LocatedTablet tablet = bufferedOp.getTablet(); + Slice tabletId = new Slice(tablet.getTabletId()); + + List batchList = batches.get(tabletId); + if (batchList == null) { + Batch batch = new Batch(operation.getTable(), tablet, ignoreAllDuplicateRows, + ignoreAllNotFoundRows, txnId); + batch.add(operation, currentIndex++); + List list = new ArrayList<>(); + list.add(batch); + batches.put(tabletId, list); + continue; + } + // Compare with the last schema in the list, because the last operations + // have the same schemas with it most likely. + Batch lastBatch = batchList.get(batchList.size() - 1); + if (lastBatch.operations.get(0).table.getSchema() + .equals(operation.table.getSchema())) { + lastBatch.add(operation, currentIndex++); + continue; + } + // Put it into a separate batch. + Batch batch = new Batch(operation.getTable(), tablet, ignoreAllDuplicateRows, + ignoreAllNotFoundRows, txnId); + batch.add(operation, currentIndex++); + batchList.add(batch); + } + int batchSize = 0; + for (List batchList : batches.values()) { + batchSize += batchList.size(); + } + List> batchResponses = new ArrayList<>(batchSize + 1); + if (!opsFailedInLookup.isEmpty()) { + batchResponses.add( + Deferred.fromResult(new BatchResponse(opsFailedInLookup, opsFailedIndexesList))); + } + + for (List batchList : batches.values()) { + for (Batch batch : batchList) { + if (timeoutMillis != 0) { + batch.resetTimeoutMillis(client.getTimer(), timeoutMillis); + } + addBatchCallbacks(batch); + batchResponses.add(client.sendRpcToTablet(batch)); + } + } + + // On completion of all batches, fire the completion deferred, and add the buffer + // back to the inactive buffers queue. This frees it up for new inserts. + AsyncUtil.addBoth( + Deferred.group(batchResponses), + new Callback() { + @Override + public Void call(Object responses) { + queueBuffer(buffer); + deferred.callback(responses); + return null; + } + }); + + return null; + } + + /** + * Creates callbacks to handle a multi-put and adds them to the request. + * @param request the request for which we must handle the response + */ + private void addBatchCallbacks(final Batch request) { + final class BatchCallback implements Callback { + @Override + public BatchResponse call(final BatchResponse response) { + LOG.trace("Got a Batch response for {} rows", request.operations.size()); + AsyncKuduSession.this.client.updateLastPropagatedTimestamp(response.getWriteTimestamp()); + + // Send individualized responses to all the operations in this batch. + for (OperationResponse operationResponse : response.getIndividualResponses()) { + if (flushMode == FlushMode.AUTO_FLUSH_BACKGROUND && operationResponse.hasRowError()) { + errorCollector.addError(operationResponse.getRowError()); + } + + // Fire the callback after collecting the errors so that the errors + // are visible should the callback interrogate the error collector. + operationResponse.getOperation().callback(operationResponse); + } + writeOpMetrics.update(response.getWriteOpMetrics()); + + return response; + } + + @Override + public String toString() { + return "apply batch response"; + } + } + + final class BatchErrCallback implements Callback { + @Override + public Object call(Exception e) { + // If the exception we receive is a KuduException we're going to build OperationResponses. + Status status = null; + List responses = null; + boolean handleKuduException = e instanceof KuduException; + if (handleKuduException) { + status = ((KuduException) e).getStatus(); + responses = new ArrayList<>(request.operations.size()); + } + + for (Operation operation : request.operations) { + // Same comment as in BatchCallback regarding the ordering of when to callback. + if (handleKuduException) { + RowError rowError = new RowError(status, operation); + OperationResponse response = new OperationResponse(0, null, 0, operation, rowError); + errorCollector.addError(rowError); + responses.add(response); + + operation.callback(response); + } else { + // We have no idea what the exception is so we'll just send it up. + operation.errback(e); + } + } + + // Note that returning an object that's not an exception will make us leave the + // errback chain. Effectively, the BatchResponse below will end up as part of the list + // passed to ConvertBatchToListOfResponsesCB. + return handleKuduException ? new BatchResponse(responses, request.operationIndexes) : e; + } + + @Override + public String toString() { + return "apply batch error response"; + } + } + + request.getDeferred().addCallbacks(new BatchCallback(), new BatchErrCallback()); + } + + /** + * Returns a buffer to the inactive queue after flushing. + * @param buffer the buffer to return to the inactive queue. + */ + private void queueBuffer(Buffer buffer) { + if (buffer.callbackFlushNotification()) { + inactiveBuffers.add(buffer); + } + Deferred localFlushNotification = flushNotification.getAndSet(new Deferred<>()); + localFlushNotification.callback(null); + } + } + + /** + * Flush buffered writes. + * @return a {@link Deferred} whose callback chain will be invoked when all applied operations at + * the time of the call have been flushed. + */ + public Deferred> flush() { + Buffer buffer; + Deferred nonActiveBufferFlush; + synchronized (monitor) { + nonActiveBufferFlush = getNonActiveFlushNotificationUnlocked(); + buffer = retireActiveBufferUnlocked(); + } + + // TODO(wdb): If there is a buffer flushing already, this code will wait for it to finish before + // flushing 'buffer'. This is less performant but has less surprising semantics than + // simultaneously flushing two buffers. Even though we don't promise those semantics, + // I'm going to leave it this way for now because it's never caused any trouble. + return AsyncUtil.addBothDeferring(nonActiveBufferFlush, unused -> doFlush(buffer)); + } + + /** + * Flushes a write buffer. This method takes ownership of 'buffer', no other concurrent access + * is allowed. 'buffer' is allowed to be null. + * + * @param buffer the buffer to flush, must not be modified once passed to this method + * @return the operation responses + */ + private Deferred> doFlush(Buffer buffer) { + if (buffer == null || buffer.isEmpty()) { + return Deferred.fromResult(ImmutableList.of()); + } + LOG.debug("flushing buffer: {}", buffer); + + Deferred> batchResponses = new Deferred<>(); + Callback tabletLookupCB = new TabletLookupCB(buffer, batchResponses); + + for (BufferedOperation bufferedOperation : buffer) { + AsyncUtil.addBoth(bufferedOperation.getTabletLookup(), tabletLookupCB); + } + + return batchResponses.addCallback(ConvertBatchToListOfResponsesCB.getInstance()); + } + + /** + * Callback used to send a list of OperationResponse instead of BatchResponse since the + * latter is an implementation detail. + */ + private static class ConvertBatchToListOfResponsesCB implements Callback, + List> { + private static final ConvertBatchToListOfResponsesCB INSTANCE = + new ConvertBatchToListOfResponsesCB(); + + @Override + public List call(List batchResponses) throws Exception { + // First compute the size of the union of all the lists so that we don't trigger expensive + // list growths while adding responses to it. + int size = 0; + for (BatchResponse batchResponse : batchResponses) { + size += batchResponse.getIndividualResponses().size(); + } + + OperationResponse[] responses = new OperationResponse[size]; + for (BatchResponse batchResponse : batchResponses) { + List responseList = batchResponse.getIndividualResponses(); + List indexList = batchResponse.getResponseIndexes(); + for (int i = 0; i < indexList.size(); i++) { + int index = indexList.get(i); + assert responses[index] == null; + responses[index] = responseList.get(i); + } + } + + return Arrays.asList(responses); + } + + @Override + public String toString() { + return "ConvertBatchToListOfResponsesCB"; + } + + public static ConvertBatchToListOfResponsesCB getInstance() { + return INSTANCE; + } + } + + @Override + public boolean hasPendingOperations() { + synchronized (monitor) { + return activeBuffer == null ? inactiveBuffers.size() < 2 : + !activeBuffer.isEmpty() || !inactiveBufferAvailable(); + } + } + + // TODO(wdberkeley): Get rid of the idea of an Operation as a distinct way to do a write. Replace + // it with a single-operation Batch. + private Deferred doAutoFlushSync(final Operation operation) { + if (timeoutMillis != 0) { + operation.resetTimeoutMillis(client.getTimer(), timeoutMillis); + } + operation.setExternalConsistencyMode(consistencyMode); + operation.setIgnoreAllDuplicateRows(ignoreAllDuplicateRows); + operation.setIgnoreAllNotFoundRows(ignoreAllNotFoundRows); + operation.setTxnId(txnId); + + return client.sendRpcToTablet(operation) + .addCallbackDeferring(resp -> { + client.updateLastPropagatedTimestamp(resp.getWriteTimestampRaw()); + writeOpMetrics.update(resp.getWriteOpMetrics()); + return Deferred.fromResult(resp); + }) + .addErrback(new SingleOperationErrCallback(operation)); + } + + private boolean isExcessMaxSize(long size) { + return mutationBufferMaxSize >= 0 && size >= mutationBufferMaxSize; + } + + /** + * Check the buffer and determine whether a flush operation needs to be performed. + * @param activeBufferOps the number of active buffer ops + * @param activeBufferSize the number of active buffer byte size + * @return true if the flush in need. + */ + private boolean needFlush(int activeBufferOps, long activeBufferSize) { + return activeBufferOps >= mutationBufferMaxOps || isExcessMaxSize(activeBufferSize); + } + + /** + * Apply the given operation. + *

+ * The behavior of this method depends on the configured + * {@link SessionConfiguration.FlushMode FlushMode}. Regardless + * of flush mode, however, {@code apply()} may begin to perform processing in the background + * for the call (e.g looking up the tablet location, etc). + * @param operation operation to apply + * @return a Deferred to track this operation + * @throws KuduException if an error happens or {@link PleaseThrottleException} is triggered + * @see SessionConfiguration.FlushMode FlushMode + */ + public Deferred apply(final Operation operation) throws KuduException { + Preconditions.checkNotNull(operation, "Cannot apply a null operation"); + Preconditions.checkArgument(operation.getTable().getAsyncClient() == client, + "Applied operations must be created from a KuduTable instance opened " + + "from the same client that opened this KuduSession"); + // We do not want to have auto-incrementing column set for INSERT operations. + if (operation.getRow().getSchema().hasAutoIncrementingColumn() && + operation.getRow().isSet(Schema.getAutoIncrementingColumnName()) && + (operation.getChangeType() == Operation.ChangeType.INSERT || + operation.getChangeType() == Operation.ChangeType.INSERT_IGNORE)) { + throw new IllegalArgumentException("Auto-Incrementing column should not " + + "be specified for INSERT operation"); + } + if (closed) { + // Ideally this would be a precondition, but that may break existing + // clients who have grown to rely on this unsafe behavior. + throttleClosedLog.warn(60L, "Applying an operation in a closed session; this is unsafe"); + } + + // Freeze the row so that the client cannot concurrently modify it while it is in flight. + operation.getRow().freeze(); + + // If immediate flush mode, send the operation directly. + if (flushMode == FlushMode.AUTO_FLUSH_SYNC) { + return doAutoFlushSync(operation); + } + + // Kick off a location lookup. + Deferred tablet = client.getTabletLocation(operation.getTable(), + operation.partitionKey(), + LookupType.POINT, + timeoutMillis); + + // Holds buffers that should be flushed outside the synchronized block, if necessary. + List fullBuffers = new ArrayList<>(); + try { + synchronized (monitor) { + Deferred notification = flushNotification.get(); + if (activeBuffer == null) { + // If the active buffer is null then we recently flushed. Check if there + // is an inactive buffer available to replace as the active. + if (inactiveBufferAvailable()) { + refreshActiveBufferUnlocked(); + } else { + Status statusServiceUnavailable = + Status.ServiceUnavailable("all buffers are currently flushing"); + // This can happen if the user writes into a buffer, flushes it, writes + // into the second, flushes it, and immediately tries to write again. + throw new PleaseThrottleException(statusServiceUnavailable, + null, operation, notification); + } + } + + int activeBufferOps = activeBuffer.numOps(); + long activeBufferSize = activeBuffer.bufferSize(); + switch (flushMode) { + case AUTO_FLUSH_SYNC: { + // This case is handled above and is impossible here. + // TODO(wdberkeley): Handle AUTO_FLUSH_SYNC just like other flush modes. + assert false; + break; + } + case MANUAL_FLUSH: { + if (needFlush(activeBufferOps, activeBufferSize)) { + Status statusIllegalState = + Status.IllegalState("MANUAL_FLUSH is enabled but the buffer is too big"); + throw new NonRecoverableException(statusIllegalState); + } + activeBuffer.addOperation(new BufferedOperation(tablet, operation)); + break; + } + case AUTO_FLUSH_BACKGROUND: { + if (needFlush(activeBufferOps, activeBufferSize)) { + // If the active buffer is full or overflowing, be sure to kick off a flush. + fullBuffers.add(retireActiveBufferUnlocked()); + activeBufferOps = 0; + + if (!inactiveBufferAvailable()) { + Status statusServiceUnavailable = + Status.ServiceUnavailable("All buffers are currently flushing"); + throw new PleaseThrottleException(statusServiceUnavailable, + null, operation, notification); + } + refreshActiveBufferUnlocked(); + } + + // Add the operation to the active buffer, and: + // 1. If it's the first operation in the buffer, start a background flush timer. + // 2. If it filled or overflowed the buffer, kick off a flush. + activeBuffer.addOperation(new BufferedOperation(tablet, operation)); + if (activeBufferOps == 0) { + AsyncKuduClient.newTimeout(client.getTimer(), activeBuffer.getFlusherTask(), + flushIntervalMillis); + } + if (needFlush(activeBufferOps + 1, activeBufferSize + operation.getRow().size()) && + inactiveBufferAvailable()) { + fullBuffers.add(retireActiveBufferUnlocked()); + } + break; + } + default: + throw new IllegalArgumentException("Unexpected flushMode: " + flushMode); + } + } + } finally { + // Flush the buffers outside of the synchronized block, if required. + for (Buffer fullBuffer : fullBuffers) { + doFlush(fullBuffer); + } + } + return operation.getDeferred(); + } + + /** + * Returns {@code true} if there is an inactive buffer available. + * @return true if there is currently an inactive buffer available + */ + private boolean inactiveBufferAvailable() { + return inactiveBuffers.peek() != null; + } + + /** + * Refreshes the active buffer. This should only be called after a + * {@link #flush()} when the active buffer is {@code null}, there is an + * inactive buffer available (see {@link #inactiveBufferAvailable()}, and + * {@link #monitor} is locked. + */ + @GuardedBy("monitor") + private void refreshActiveBufferUnlocked() { + Preconditions.checkState(activeBuffer == null); + activeBuffer = inactiveBuffers.remove(); + activeBuffer.resetUnlocked(); + } + + /** + * Retires the active buffer and returns it. Returns null if there is no active buffer. + * This should only be called if {@link #monitor} is locked. + */ + @GuardedBy("monitor") + private Buffer retireActiveBufferUnlocked() { + Buffer buffer = activeBuffer; + activeBuffer = null; + return buffer; + } + + /** + * Returns a flush notification for the currently non-active buffers. + * This is used during manual {@link #flush} calls to ensure that all buffers (not just the active + * buffer) are fully flushed before completing. + */ + @GuardedBy("monitor") + private Deferred getNonActiveFlushNotificationUnlocked() { + final Deferred notificationA = bufferA.getFlushNotification(); + final Deferred notificationB = bufferB.getFlushNotification(); + if (activeBuffer == null) { + // Both buffers are either flushing or inactive. + return AsyncUtil.addBothDeferring(notificationA, unused -> notificationB); + } else if (activeBuffer == bufferA) { + return notificationB; + } else { + return notificationA; + } + } + + + /** + * Analogous to BatchErrCallback above but for AUTO_FLUSH_SYNC which doesn't handle lists of + * operations and responses. + */ + private static final class SingleOperationErrCallback implements Callback { + + private final Operation operation; + + private SingleOperationErrCallback(Operation operation) { + this.operation = operation; + } + + @Override + public Object call(Exception e) throws Exception { + if (e instanceof KuduException) { + Status status; + if (e instanceof NonCoveredRangeException) { + status = Status.NotFound(String.format( + "%s: %s", e.getMessage(), operation.getTable().getName())); + } else { + status = ((KuduException) e).getStatus(); + } + return new OperationResponse(0, null, 0, operation, new RowError(status, operation)); + } + return e; + } + } + + /** + * A FlusherTask is created for each active buffer in mode + * {@link FlushMode#AUTO_FLUSH_BACKGROUND}. + */ + private final class FlusherTask implements TimerTask { + @Override + public void run(final Timeout timeout) { + Buffer buffer = null; + synchronized (monitor) { + if (activeBuffer == null) { + return; + } + if (activeBuffer.getFlusherTask() == this) { + buffer = retireActiveBufferUnlocked(); + } + } + + doFlush(buffer); + } + } + + /** + * The {@code Buffer} consists of a list of operations, an optional pointer to a flush task, + * and a flush notification. + * + * The {@link #flusherTask} is used in mode {@link FlushMode#AUTO_FLUSH_BACKGROUND} to point to + * the background flusher task assigned to the buffer when it becomes active and the first + * operation is applied to it. When the flusher task executes after the timeout, it checks + * that the currently active buffer's flusher task points to itself before executing the flush. + * This protects against the background task waking up after one or more manual flushes and + * attempting to flush the active buffer. + * + * The {@link #flushNotification} deferred is used when executing manual {@link #flush}es to + * ensure that non-active buffers are fully flushed. {@code flushNotification} is completed + * when this buffer is successfully flushed. When the buffer is promoted from inactive to active, + * the deferred is replaced with a new one to indicate that the buffer is not yet flushed. + * + * Buffer is externally synchronized. When the active buffer, {@link #monitor} + * synchronizes access to it. + */ + private final class Buffer implements Iterable { + private final List operations = new ArrayList<>(); + + // NOTE: This param is different from operations.size(). + // It's the number of total buffer operation size, mainly used to count the used buffer size. + private long operationSize; + private FlusherTask flusherTask = null; + + private Deferred flushNotification = Deferred.fromResult(null); + private boolean flushNotificationFired = false; + + public void addOperation(BufferedOperation operation) { + operations.add(operation); + operationSize += operation.getOperation().getRow().size(); + } + + @Override + public Iterator iterator() { + return operations.iterator(); + } + + public boolean isEmpty() { + return operations.isEmpty(); + } + + public int numOps() { + return operations.size(); + } + + public long bufferSize() { + return operationSize; + } + + @GuardedBy("monitor") + FlusherTask getFlusherTask() { + if (flusherTask == null) { + flusherTask = new FlusherTask(); + } + return flusherTask; + } + + /** + * Returns a {@link Deferred} which will be completed when this buffer is flushed. If the buffer + * is inactive (its flush is complete and it has been enqueued into {@link #inactiveBuffers}), + * then the deferred will already be complete. + */ + Deferred getFlushNotification() { + return flushNotification; + } + + /** + * Completes the buffer's flush notification. Should be called when + * the buffer has been successfully flushed. + */ + boolean callbackFlushNotification() { + LOG.trace("buffer flush notification fired: {}", this); + if (injectLatencyBufferFlushCb) { + try { + Thread.sleep(randomizer.nextInt(16)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + synchronized (monitor) { + if (flushNotificationFired) { + // Do nothing: the callback has been called already. + return false; + } + flushNotificationFired = true; + flushNotification.callback(null); + } + return true; + } + + /** + * Resets the buffer's internal state. Should be called when the buffer is promoted from + * inactive to active. + */ + @GuardedBy("monitor") + void resetUnlocked() { + LOG.trace("buffer resetUnlocked: {}", this); + operations.clear(); + operationSize = 0; + flushNotification = new Deferred<>(); + flushNotificationFired = false; + flusherTask = null; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("operations", operations.size()) + .add("operationSize", operationSize) + .add("flusherTask", flusherTask) + .add("flushNotification", flushNotification) + .toString(); + } + } + + /** + * Container class holding all the state associated with a buffered operation. + */ + private static final class BufferedOperation { + /** Holds either a {@link LocatedTablet} or the failure exception if the lookup failed. */ + private Object tablet = null; + private final Deferred tabletLookup; + private final Operation operation; + + public BufferedOperation(Deferred tablet, + Operation operation) { + tabletLookup = AsyncUtil.addBoth(tablet, new Callback() { + @Override + public Void call(final Object tablet) { + BufferedOperation.this.tablet = tablet; + return null; + } + }); + this.operation = Preconditions.checkNotNull(operation); + } + + /** + * @return {@code true} if the tablet lookup failed. + */ + public boolean tabletLookupFailed() { + return !(tablet instanceof LocatedTablet); + } + + /** + * @return the located tablet + * @throws ClassCastException if the tablet lookup failed, + * check with {@link #tabletLookupFailed} before calling + */ + public LocatedTablet getTablet() { + return (LocatedTablet) tablet; + } + + /** + * @return the cause of the failed lookup + * @throws ClassCastException if the tablet lookup succeeded, + * check with {@link #tabletLookupFailed} before calling + */ + public Exception getTabletLookupFailure() { + return (Exception) tablet; + } + + public Deferred getTabletLookup() { + return tabletLookup; + } + + public Operation getOperation() { + return operation; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("tablet", tablet) + .add("operation", operation) + .toString(); + } + } + + private static boolean injectLatencyBufferFlushCb = false; + + /** + * Inject latency into {@link Buffer#callbackFlushNotification}. + */ + @InterfaceAudience.LimitedPrivate("Test") + static void injectLatencyBufferFlushCb(boolean injectLatency) { + injectLatencyBufferFlushCb = injectLatency; + LOG.warn("latency injection for Buffer flush notification is {}", + injectLatency ? "enabled" : "disabled"); + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AuthnTokenReacquirer.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AuthnTokenReacquirer.java new file mode 100644 index 0000000000..04e8ff00a9 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AuthnTokenReacquirer.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.List; +import javax.annotation.concurrent.GuardedBy; + +import com.google.common.collect.Lists; +import com.stumbleupon.async.Callback; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * An utility class to reacquire authentication token when the current one expires. + */ +@InterfaceAudience.Private +final class AuthnTokenReacquirer { + /** The Kudu client object the AuthnTokenReacquirer is bound to. */ + private final AsyncKuduClient client; + + /** A dedicated synchronization object for #queuedRpcs */ + private final Object queuedRpcsLock = new Object(); + + /** + * Container to store information on RPCs affected by authn token expiration error. The RPCs + * will be retried on successful token re-acquisition attempt or their errback() method + * will be called if authn token re-acquisition fails. + */ + @GuardedBy("queuedRpcsLock") + private ArrayList> queuedRpcs = Lists.newArrayList(); + + /** + * Create a new AuthnTokenReacquirer object. + * + * @param client the Kudu client object + */ + AuthnTokenReacquirer(AsyncKuduClient client) { + this.client = client; + } + + /** + * Add information on the RPC which failed due to expired authentication token and requires a new + * authn token to retry. Calling this method triggers authn token re-acquisition if there is not + * active one yet. + * + * @param rpc the RPC which failed due to the expired authn token error + */ + void handleAuthnTokenExpiration(KuduRpc rpc) { + boolean doReacquire = false; + synchronized (queuedRpcsLock) { + if (queuedRpcs.isEmpty()) { + // Using non-emptiness of the container as a state here. If the container is empty, that + // means a new re-acquisition round should be started. If the container is not empty, + // that means the process of token re-acquisition has already been already and the + // elements of the #queuedRpcs container should be processed once a new token + // re-acquisition completes (it could succeed or fail). + // + // TODO(aserbin): introduce a timestamp for the recently acquired authn token, so it would + // not try to re-acquire a token too often if there is a race between clearing + // the container after token acquisition is completed and scheduling token re-acquisition. + doReacquire = true; + } + queuedRpcs.add(rpc); + } + rpc.addTrace(new RpcTraceFrame.RpcTraceFrameBuilder( + rpc.method(), RpcTraceFrame.Action.GET_NEW_AUTHENTICATION_TOKEN_THEN_RETRY) + .build()); + + if (doReacquire) { + reacquireAuthnToken(); + } + } + + private List> swapQueuedRpcs() { + List> rpcList; + synchronized (queuedRpcsLock) { + rpcList = queuedRpcs; + queuedRpcs = Lists.newArrayList(); + } + assert !rpcList.isEmpty(); + return rpcList; + } + + private void reacquireAuthnToken() { + + /* + An utility class providing callbacks for successful completion of authn token re-acqusition. + */ + final class NewAuthnTokenCB implements Callback { + /** + * Callback upon 'successful' completion of an attempt to acquire a new token, + * i.e. an attempt where no exception detected in the code path. + * + * @param tokenAcquired {@code true} if a new token acquired, {@code false} if + * the ConnectToCluster yielded no authn token. + */ + @Override + public Void call(Boolean tokenAcquired) throws Exception { + // TODO(aserbin): do we need to handle a successful re-connect with no token some other way? + retryQueuedRpcs(); + return null; + } + + /** + * Handle the affected RPCs on the completion of authn token re-acquisition. The result authn + * token might be null, so in that case primary credentials will be used for future + * connection negotiations. + */ + void retryQueuedRpcs() { + List> list = swapQueuedRpcs(); + for (KuduRpc rpc : list) { + client.handleRetryableErrorNoDelay(rpc, null); + } + } + } + + /* + * Errback to retry authn token re-acquisition and notify the handle the affected RPCs if the + * re-acquisition failed after some number of retries (currently, it's 5 attempts). + * + * TODO(aserbin): perhaps we should retry indefinitely with increasing backoff, but aggressively + * timeout RPCs in the queue after each failure. + */ + final class NewAuthnTokenErrB implements Callback { + private static final int MAX_ATTEMPTS = 5; + private final NewAuthnTokenCB cb; + private int attempts = 0; + + NewAuthnTokenErrB(NewAuthnTokenCB cb) { + this.cb = cb; + } + + @Override + public Void call(Exception e) { + if (e instanceof RecoverableException && attempts < MAX_ATTEMPTS) { + client.reconnectToCluster(cb, this); + ++attempts; + return null; + } + + Exception reason = new NonRecoverableException(Status.NotAuthorized(String.format( + "cannot re-acquire authentication token after %d attempts (%s)", + MAX_ATTEMPTS, + e.getMessage()))); + failQueuedRpcs(reason); + return null; + } + + /** Handle the affected RPCs if authn token re-acquisition fails. + */ + void failQueuedRpcs(Exception reason) { + List> rpcList = swapQueuedRpcs(); + for (KuduRpc rpc : rpcList) { + rpc.errback(reason); + } + } + } + + final NewAuthnTokenCB newTokenCb = new NewAuthnTokenCB(); + final NewAuthnTokenErrB newTokenErrb = new NewAuthnTokenErrB(newTokenCb); + client.reconnectToCluster(newTokenCb, newTokenErrb); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AuthzTokenCache.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AuthzTokenCache.java new file mode 100644 index 0000000000..c83b1e5b4b --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/AuthzTokenCache.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import javax.annotation.Nonnull; +import javax.annotation.concurrent.GuardedBy; +import javax.annotation.concurrent.ThreadSafe; + +import com.google.common.base.Preconditions; +import com.stumbleupon.async.Callback; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.security.Token; + +/** + * Cache for authz tokens received from the master of unbounded capacity. A + * client will receive an authz token upon opening a table and put it into the + * cache. A subsequent operation that requires an authz token (e.g. writes, + * scans) will fetch it from the cache and attach it to the operation request. + */ +@ThreadSafe +@InterfaceAudience.Private +public class AuthzTokenCache { + private static class RpcAndException { + final KuduRpc rpc; + final KuduException ex; + + RpcAndException(KuduRpc rpc, KuduException ex) { + this.rpc = rpc; + this.ex = ex; + } + } + + private static final Logger LOG = LoggerFactory.getLogger(AuthzTokenCache.class); + private final AsyncKuduClient client; + + // Map from a table ID to an authz token for that table. + private final ConcurrentHashMap authzTokens = + new ConcurrentHashMap<>(); + + // Map from a table ID that has an in-flight RPC to get a new authz token, to + // the list of RPCs waiting to be retried once that token is received and the + // exception each is handling. + // Note: Unlike the token map which is synchronized to make it threadsafe, + // synchronization of this map also serves to ensure requests for the same + // table ID get grouped together. + @GuardedBy("retriesLock") + private final Map> + retriesForTable = new HashMap<>(); + private final Object retriesLock = new Object(); + + // Number of RPCs sent to retrieve authz tokens. Useful for testing. + private AtomicInteger numRetrievalsSent; + + /** + * Create a new AuthzTokenCache object. + * + * @param client the Kudu client object with which to send requests. + */ + AuthzTokenCache(@Nonnull AsyncKuduClient client) { + this.client = client; + numRetrievalsSent = new AtomicInteger(0); + } + + /** + * Returns the number of RPCs sent to retrieve authz token over the lifetime + * of this cache. + * @return number of RPCs sent + */ + @InterfaceAudience.LimitedPrivate("Test") + int numRetrievalsSent() { + return numRetrievalsSent.get(); + } + + /** + * Puts the given token into the cache. No validation is done on the validity + * or expiration of the token -- that happens on the tablet servers. + * + * @param tableId the table ID the authz token is for + * @param token an authz token to put into the cache + */ + void put(@Nonnull String tableId, @Nonnull Token.SignedTokenPB token) { + authzTokens.put(tableId, token); + } + + /** + * Returns the cached token for the given 'tableId' if one exists. + * + * @param tableId table ID to get an authz token for + * @return the token for the table ID if one exists + */ + Token.SignedTokenPB get(@Nonnull String tableId) { + return authzTokens.get(tableId); + } + + /** + * Returns the list of pending RPCs waiting on a new authz token for the given + * table, clearing the table's entry in the pending map. + * + * @param tableId the table ID whose RPCs should be cleared + * @return the RPCs to be retried for the given table ID and the + */ + private List clearPendingRetries(@Nonnull String tableId) { + List pendingRetries; + synchronized (retriesLock) { + pendingRetries = retriesForTable.remove(tableId); + } + Preconditions.checkState(!pendingRetries.isEmpty(), + "no pending retries for table %s", tableId); + return pendingRetries; + } + + /** + * Sends an RPC to retrieve an authz token for retrying the specified parent + * RPC, calling 'cb' on success and 'eb' on failure. + * + * 'parentRpc' is used for logging and deadline tracking. + * + * @param parentRpc the RPC that is waiting on the authz token + * @param cb callback to be called after receiving a response from the master + * @param eb errback to be called after hitting an exception + */ + private void sendRetrievalForRpc(@Nonnull KuduRpc parentRpc, + @Nonnull Callback cb, + @Nonnull Callback eb) { + String tableId = parentRpc.getTable().getTableId(); + LOG.debug("sending RPC to retrieve token for table ID {}", tableId); + GetTableSchemaRequest retrieveAuthzTokenReq = new GetTableSchemaRequest( + client.getMasterTable(), tableId, /*name=*/null, client.getTimer(), + client.getDefaultAdminOperationTimeoutMs(), /*requiresAuthzTokenSupport=*/true); + retrieveAuthzTokenReq.setParentRpc(parentRpc); + retrieveAuthzTokenReq.timeoutTracker.setTimeout(parentRpc.timeoutTracker.getTimeout()); + numRetrievalsSent.incrementAndGet(); + client.sendRpcToTablet(retrieveAuthzTokenReq).addCallback(cb) + .addErrback(eb); + } + + /** + * Method to call upon receiving an RPC that indicates it had an invalid authz + * token and needs a new one. If there is already an in-flight RPC to retrieve + * a new authz token for the given table, add the 'rpc' to the collection of + * RPCs to be retried once the retrieval completes. + * + * @param rpc the RPC that needs a new authz token + * @param ex error that caused triggered this retrieval + * @param the RPC type + */ + void retrieveAuthzToken(@Nonnull final KuduRpc rpc, @Nonnull final KuduException ex) { + /* + * Handles a response from getting an authz token. + */ + final class NewAuthzTokenCB implements Callback { + private final String tableId; + + public NewAuthzTokenCB(String tableId) { + this.tableId = tableId; + } + + @Override + public Void call(@Nonnull GetTableSchemaResponse resp) throws Exception { + if (resp.getAuthzToken() == null) { + // Note: If we were talking to an old master, we would hit an + // exception earlier in the RPC handling. + throw new NonRecoverableException( + Status.InvalidArgument("no authz token retrieved for " + tableId)); + } + LOG.debug("retrieved authz token for {}", tableId); + put(tableId, resp.getAuthzToken()); + for (RpcAndException rpcAndEx : clearPendingRetries(tableId)) { + client.handleRetryableErrorNoDelay(rpcAndEx.rpc, rpcAndEx.ex); + } + return null; + } + } + + /* + * Handles the case where there was an error getting the new authz token. + */ + final class NewAuthzTokenErrB implements Callback { + private KuduRpc parentRpc; + private final NewAuthzTokenCB cb; + + public NewAuthzTokenErrB(@Nonnull NewAuthzTokenCB cb, @Nonnull KuduRpc parentRpc) { + this.cb = cb; + this.parentRpc = parentRpc; + } + + @Override + public Void call(@Nonnull Exception e) { + String tableId = cb.tableId; + if (e instanceof RecoverableException) { + sendRetrievalForRpc(parentRpc, cb, this); + } else { + for (RpcAndException rpcAndEx : clearPendingRetries(tableId)) { + rpcAndEx.rpc.errback(e); + } + } + return null; + } + } + + final String tableId = rpc.getTable().getTableId(); + RpcAndException rpcAndEx = new RpcAndException(rpc, ex); + synchronized (retriesLock) { + List pendingRetries = retriesForTable.putIfAbsent( + tableId, new ArrayList<>(Arrays.asList(rpcAndEx))); + if (pendingRetries == null) { + // There isn't an in-flight RPC to retrieve a new authz token. + NewAuthzTokenCB newTokenCB = new NewAuthzTokenCB(tableId); + NewAuthzTokenErrB newTokenErrB = new NewAuthzTokenErrB(newTokenCB, rpc); + sendRetrievalForRpc(rpc, newTokenCB, newTokenErrB); + } else { + Preconditions.checkState(!pendingRetries.isEmpty(), + "no pending retries for table %s", tableId); + pendingRetries.add(rpcAndEx); + } + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Batch.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Batch.java new file mode 100644 index 0000000000..a248802f84 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Batch.java @@ -0,0 +1,267 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import com.google.common.base.MoreObjects; +import com.google.common.collect.Iterables; +import com.google.protobuf.Message; +import com.google.protobuf.UnsafeByteOperations; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.WireProtocol.AppStatusPB.ErrorCode; +import org.apache.kudu.client.Statistics.Statistic; +import org.apache.kudu.client.Statistics.TabletStatistics; +import org.apache.kudu.security.Token; +import org.apache.kudu.tserver.Tserver; +import org.apache.kudu.tserver.Tserver.TabletServerErrorPB; +import org.apache.kudu.util.Pair; + +/** + * Used internally to group Operations for a single tablet together before sending to the tablet + * server. + */ +@InterfaceAudience.Private +class Batch extends KuduRpc { + + /** Holds batched operations. */ + final List operations = new ArrayList<>(); + /** Holds indexes of operations in the original user's batch. */ + final List operationIndexes = new ArrayList<>(); + + /** The tablet this batch will be routed to. */ + private final LocatedTablet tablet; + + /** The token with which to authorize this RPC. */ + private Token.SignedTokenPB authzToken; + + /** + * This size will be set when serialize is called. It stands for the size of rows in all + * operations in this batch. + */ + private long rowOperationsSizeBytes = 0; + + private final EnumSet ignoredErrors; + + private final long txnId; + + Batch(KuduTable table, LocatedTablet tablet, boolean ignoreAllDuplicateRows, + boolean ignoreAllNotFoundRows, long txnId) { + super(table, null, 0); + // Build a set of ignored errors. + Set ignoredErrors = new HashSet<>(); + if (ignoreAllDuplicateRows) { + ignoredErrors.add(ErrorCode.ALREADY_PRESENT); + } + if (ignoreAllNotFoundRows) { + ignoredErrors.add(ErrorCode.NOT_FOUND); + } + // EnumSet.copyOf doesn't handle an empty set, so handle that case specially. + if (ignoredErrors.isEmpty()) { + this.ignoredErrors = EnumSet.noneOf(ErrorCode.class); + } else { + this.ignoredErrors = EnumSet.copyOf(ignoredErrors); + } + this.tablet = tablet; + this.txnId = txnId; + } + + /** + * Reset the timeout of this batch. + * + * TODO(wdberkeley): The fact we have to do this is a sign an Operation should not subclass + * KuduRpc. + * + * @param timeoutMillis the new timeout of the batch in milliseconds + */ + void resetTimeoutMillis(Timer timer, long timeoutMillis) { + timeoutTracker.reset(); + timeoutTracker.setTimeout(timeoutMillis); + if (timeoutTask != null) { + timeoutTask.cancel(); + } + timeoutTask = AsyncKuduClient.newTimeout(timer, new RpcTimeoutTask(), timeoutMillis); + } + + /** + * Returns the bytes size of this batch's row operations after serialization. + * @return size in bytes + * @throws IllegalStateException thrown if this RPC hasn't been serialized eg sent to a TS + */ + long getRowOperationsSizeBytes() { + if (this.rowOperationsSizeBytes == 0) { + throw new IllegalStateException("This row hasn't been serialized yet"); + } + return this.rowOperationsSizeBytes; + } + + public void add(Operation operation, int index) { + assert Bytes.memcmp(operation.partitionKey(), + tablet.getPartition().getPartitionKeyStart()) >= 0 && + (tablet.getPartition().getPartitionKeyEnd().length == 0 || + Bytes.memcmp(operation.partitionKey(), + tablet.getPartition().getPartitionKeyEnd()) < 0); + + operations.add(operation); + operationIndexes.add(index); + } + + @Override + boolean needsAuthzToken() { + return true; + } + + @Override + void bindAuthzToken(Token.SignedTokenPB token) { + authzToken = token; + } + + @Override + Message createRequestPB() { + final Tserver.WriteRequestPB.Builder builder = + Operation.createAndFillWriteRequestPB(operations); + rowOperationsSizeBytes = (long)builder.getRowOperations().getRows().size() + + (long)builder.getRowOperations().getIndirectData().size(); + builder.setTabletId(UnsafeByteOperations.unsafeWrap(getTablet().getTabletIdAsBytes())); + builder.setExternalConsistencyMode(externalConsistencyMode.pbVersion()); + if (this.propagatedTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + builder.setPropagatedTimestamp(this.propagatedTimestamp); + } + if (authzToken != null) { + builder.setAuthzToken(authzToken); + } + if (this.txnId != AsyncKuduClient.INVALID_TXN_ID) { + builder.setTxnId(this.txnId); + } + return builder.build(); + } + + @Override + String serviceName() { + return TABLET_SERVER_SERVICE_NAME; + } + + @Override + String method() { + return Operation.METHOD; + } + + @Override + Pair deserialize(CallResponse callResponse, + String tsUUID) throws KuduException { + Tserver.WriteResponsePB.Builder builder = Tserver.WriteResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), builder); + + List errorsPB = builder.getPerRowErrorsList(); + // Create a new list of errors that doesn't contain ignored error codes. + if (!ignoredErrors.isEmpty()) { + List filteredErrors = new ArrayList<>(); + for (Tserver.WriteResponsePB.PerRowErrorPB errorPB : errorsPB) { + if (!ignoredErrors.contains(errorPB.getError().getCode())) { + filteredErrors.add(errorPB); + } + } + errorsPB = filteredErrors; + } + ResourceMetrics metrics = builder.hasResourceMetrics() ? + ResourceMetrics.fromResourceMetricsPB(builder.getResourceMetrics()) : null; + BatchResponse response = new BatchResponse(timeoutTracker.getElapsedMillis(), + tsUUID, + builder.getTimestamp(), + errorsPB, + operations, + operationIndexes, + metrics); + + if (injectedError != null) { + if (injectedlatencyMs > 0) { + try { + Thread.sleep(injectedlatencyMs); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + return new Pair<>(response, injectedError); + } + + return new Pair<>(response, + builder.hasError() ? builder.getError() : null); + } + + @Override + public byte[] partitionKey() { + return tablet.getPartition().getPartitionKeyStart(); + } + + @Override + boolean isRequestTracked() { + return true; + } + + @Override + void updateStatistics(Statistics statistics, BatchResponse response) { + String tabletId = this.getTablet().getTabletId(); + String tableName = this.getTable().getName(); + TabletStatistics tabletStatistics = statistics.getTabletStatistics(tableName, tabletId); + if (response == null) { + tabletStatistics.incrementStatistic(Statistic.OPS_ERRORS, operations.size()); + tabletStatistics.incrementStatistic(Statistic.RPC_ERRORS, 1); + return; + } + tabletStatistics.incrementStatistic(Statistic.WRITE_RPCS, 1); + for (OperationResponse opResponse : response.getIndividualResponses()) { + if (opResponse.hasRowError()) { + tabletStatistics.incrementStatistic(Statistic.OPS_ERRORS, 1); + } else { + tabletStatistics.incrementStatistic(Statistic.WRITE_OPS, 1); + } + } + tabletStatistics.incrementStatistic(Statistic.BYTES_WRITTEN, getRowOperationsSizeBytes()); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("operations", operations.size()) + .add("tablet", tablet) + .add("ignoredErrors", Iterables.toString(ignoredErrors)) + .add("rpc", super.toString()) + .toString(); + } + + private static TabletServerErrorPB injectedError; + private static int injectedlatencyMs; + + /** + * Inject tablet server side error for Batch rpc related tests. + * @param error error response from tablet server + * @param latencyMs blocks response handling thread for some time to simulate + * write latency + */ + @InterfaceAudience.LimitedPrivate("Test") + static void injectTabletServerErrorAndLatency(TabletServerErrorPB error, int latencyMs) { + injectedError = error; + injectedlatencyMs = latencyMs; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/BatchResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/BatchResponse.java new file mode 100644 index 0000000000..f11f930b3c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/BatchResponse.java @@ -0,0 +1,140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import javax.annotation.Nullable; + +import com.google.common.collect.ImmutableList; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.tserver.Tserver; + +/** + * Response type for Batch (which is used internally by AsyncKuduSession). + * Provides the Hybrid Time write timestamp returned by the Tablet Server. + */ +@InterfaceAudience.Private +public class BatchResponse extends KuduRpcResponse { + + private final long writeTimestamp; + private final List rowErrors; + private final List individualResponses; + private final List responsesIndexes; + private final ResourceMetrics writeOpMetrics; + + /** + * Package-private constructor to be used by the RPCs. + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param writeTimestamp HT's write timestamp + * @param errorsPB a list of row errors, can be empty + * @param operations the list of operations which created this response + * @param indexes the list of operations' order index + * @param writeOpMetrics the write operation metrics, can be null + */ + BatchResponse(long elapsedMillis, + String tsUUID, + long writeTimestamp, + List errorsPB, + List operations, + List indexes, + ResourceMetrics writeOpMetrics) { + super(elapsedMillis, tsUUID); + this.writeTimestamp = writeTimestamp; + individualResponses = new ArrayList<>(operations.size()); + this.responsesIndexes = indexes; + if (errorsPB.isEmpty()) { + rowErrors = Collections.emptyList(); + } else { + rowErrors = new ArrayList<>(errorsPB.size()); + } + this.writeOpMetrics = writeOpMetrics; + + // Populate the list of individual row responses and the list of row errors. Not all the rows + // maybe have errors, but 'errorsPB' contains them in the same order as the operations that + // were sent. + int currentErrorIndex = 0; + Operation currentOperation; + for (int i = 0; i < operations.size(); i++) { + RowError rowError = null; + currentOperation = operations.get(i); + if (currentErrorIndex < errorsPB.size() && + errorsPB.get(currentErrorIndex).getRowIndex() == i) { + rowError = RowError.fromRowErrorPb(errorsPB.get(currentErrorIndex), + currentOperation, tsUUID); + rowErrors.add(rowError); + currentErrorIndex++; + } + individualResponses.add( + new OperationResponse(currentOperation.timeoutTracker.getElapsedMillis(), + tsUUID, + writeTimestamp, + currentOperation, + rowError)); + } + assert (rowErrors.size() == errorsPB.size()); + assert (individualResponses.size() == operations.size()); + assert (individualResponses.size() == responsesIndexes.size()); + } + + BatchResponse(List individualResponses, List indexes) { + super(0, null); + writeTimestamp = 0; + rowErrors = ImmutableList.of(); + this.individualResponses = individualResponses; + this.responsesIndexes = indexes; + this.writeOpMetrics = null; + } + + /** + * Gives the write timestamp that was returned by the Tablet Server. + * @return a timestamp in milliseconds, 0 if the external consistency mode set in AsyncKuduSession + * wasn't CLIENT_PROPAGATED + */ + public long getWriteTimestamp() { + return writeTimestamp; + } + + /** + * Package-private method to get the individual responses. + * @return a list of OperationResponses + */ + List getIndividualResponses() { + return individualResponses; + } + + /** + * Package-private method to get the responses' order index. + * @return a list of indexes + */ + List getResponseIndexes() { + return responsesIndexes; + } + + /** + * Return the write operation metrics associated with this batch. + * @return write operation metrics associated with this batch, or null if there is none. + */ + @Nullable + ResourceMetrics getWriteOpMetrics() { + return this.writeOpMetrics; + } + +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/BeginTransactionRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/BeginTransactionRequest.java new file mode 100644 index 0000000000..9e4bff593b --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/BeginTransactionRequest.java @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.transactions.TxnManager.BeginTransactionResponsePB; + +import java.util.Collection; +import java.util.List; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.transactions.TxnManager; +import org.apache.kudu.util.Pair; + +/** + * A wrapper class for kudu.transactions.TxnManagerService.BeginTransaction RPC. + */ +@InterfaceAudience.Private +class BeginTransactionRequest extends KuduRpc { + private static final List featureFlags = ImmutableList.of(); + + BeginTransactionRequest(KuduTable masterTable, Timer timer, long timeoutMillis) { + super(masterTable, timer, timeoutMillis); + } + + @Override + Message createRequestPB() { + return TxnManager.BeginTransactionRequestPB.getDefaultInstance(); + } + + @Override + String serviceName() { + return TXN_MANAGER_SERVICE_NAME; + } + + @Override + String method() { + return "BeginTransaction"; + } + + @Override + Pair deserialize( + final CallResponse callResponse, String serverUUID) throws KuduException { + final BeginTransactionResponsePB.Builder b = BeginTransactionResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), b); + if (!b.hasError()) { + Preconditions.checkState(b.hasTxnId()); + Preconditions.checkState(b.hasKeepaliveMillis()); + } + BeginTransactionResponse response = new BeginTransactionResponse( + timeoutTracker.getElapsedMillis(), + serverUUID, + b.getTxnId(), + b.getKeepaliveMillis()); + return new Pair<>(response, b.hasError() ? b.getError() : null); + } + + @Override + Collection getRequiredFeatures() { + return featureFlags; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/BeginTransactionResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/BeginTransactionResponse.java new file mode 100644 index 0000000000..03048bed9f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/BeginTransactionResponse.java @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class BeginTransactionResponse extends KuduRpcResponse { + private final long txnId; + private final int keepaliveMillis; + + /** + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param serverUUID UUID of the server that sent the response + * @param txnId identifier of the new transaction + * @param keepaliveMillis keepalive interval for the newly started transaction + */ + BeginTransactionResponse( + long elapsedMillis, String serverUUID, long txnId, int keepaliveMillis) { + super(elapsedMillis, serverUUID); + Preconditions.checkArgument(txnId > AsyncKuduClient.INVALID_TXN_ID); + Preconditions.checkArgument(keepaliveMillis >= 0); + this.txnId = txnId; + this.keepaliveMillis = keepaliveMillis; + } + + /** + * @return the identifier of the started transaction + */ + public long txnId() { + return txnId; + } + + /** + * @return the keepalive interval for the started transaction (milliseconds) + */ + public int keepaliveMillis() { + return keepaliveMillis; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Bytes.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Bytes.java new file mode 100644 index 0000000000..d67a440e74 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Bytes.java @@ -0,0 +1,1218 @@ +/* + * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.kudu.client; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.Serializable; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Comparator; + +import com.google.common.io.BaseEncoding; +import io.netty.buffer.ByteBuf; +import io.netty.util.CharsetUtil; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.util.DecimalUtil; +import org.apache.kudu.util.Slice; + +/** + * Helper functions to manipulate byte arrays. + */ +@InterfaceAudience.Private +public final class Bytes { + + // Two's complement reference: 2^n . + // In this case, 2^64 (so as to emulate a unsigned long) + // from http://stackoverflow.com/questions/10886962/interpret-a-negative-number-as-unsigned-with- + // biginteger-java + private static final BigInteger TWO_COMPL_REF = BigInteger.ONE.shiftLeft(64); + + private static final BigInteger BIGINT32_MAX = BigInteger.valueOf(Integer.MAX_VALUE); + private static final BigInteger BIGINT32_MIN = BigInteger.valueOf(Integer.MIN_VALUE); + private static final BigInteger BIGINT64_MAX = BigInteger.valueOf(Long.MAX_VALUE); + private static final BigInteger BIGINT64_MIN = BigInteger.valueOf(Long.MIN_VALUE); + + private Bytes() { // Can't instantiate. + } + + // -------------------------------- // + // Byte array conversion utilities. // + // -------------------------------- // + + /** + * Reads a boolean from the beginning of the given array. + * @param b The array to read from. + * @return A boolean + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static boolean getBoolean(final byte[] b) { + byte v = getByte(b, 0); + return v == 1; + } + + /** + * Reads a boolean from an offset in the given array. + * @param b The array to read from. + * @param offset The offset into the array. + * @return A boolean + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static boolean getBoolean(final byte[] b, final int offset) { + byte v = getByte(b, offset); + return v == 1; + } + + /** + * Reads a byte from the beginning of the given array. + * @param b The array to read from. + * @return A byte + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static byte getByte(final byte[] b) { + return getByte(b, 0); + } + + /** + * Reads a byte from an offset in the given array. + * @param b The array to read from. + * @return A byte + */ + public static byte getByte(final byte[] b, final int offset) { + return b[offset]; + } + + /** + * Reads an unsigned byte from the beginning of the given array. + * @param b The array to read from. + * @return A positive byte + */ + public static short getUnsignedByte(final byte[] b) { + return getUnsignedByte(b, 0); + } + + /** + * Reads an unsigned byte from an offset in the given array. + * @param b The array to read from. + * @return A positive byte + */ + public static short getUnsignedByte(final byte[] b, final int offset) { + return (short) (b[offset] & 0x00FF); + } + + /** + * Writes an unsigned byte at the beginning of the given array. + * @param b The array to write to. + * @param n An unsigned byte. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setUnsignedByte(final byte[] b, final short n) { + setUnsignedByte(b, n, 0); + } + + /** + * Writes an unsigned byte at an offset in the given array. + * @param b The array to write to. + * @param offset The offset in the array to start writing at. + * @param n An unsigned byte. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setUnsignedByte(final byte[] b, final short n, + final int offset) { + b[offset] = (byte) n; + } + + /** + * Creates a new byte array containing an unsigned byte. + * @param n An unsigned byte. + * @return A new byte array containing the given value. + */ + public static byte[] fromUnsignedByte(final short n) { + final byte[] b = new byte[1]; + setUnsignedByte(b, n); + return b; + } + + /** + * Reads a little-endian 2-byte short from the beginning of the given array. + * @param b The array to read from. + * @return A short integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static short getShort(final byte[] b) { + return getShort(b, 0); + } + + /** + * Reads a little-endian 2-byte short from an offset in the given array. + * @param b The array to read from. + * @param offset The offset in the array to start reading from. + * @return A short integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static short getShort(final byte[] b, final int offset) { + return (short) ((b[offset] & 0xFF) | (b[offset + 1] << 8)); + } + + /** + * Reads a little-endian 2-byte unsigned short from the beginning of the + * given array. + * @param b The array to read from. + * @return A positive short integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static int getUnsignedShort(final byte[] b) { + return getUnsignedShort(b, 0); + } + + /** + * Reads a little-endian 2-byte unsigned short from an offset in the + * given array. + * @param b The array to read from. + * @param offset The offset in the array to start reading from. + * @return A positive short integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static int getUnsignedShort(final byte[] b, final int offset) { + return getShort(b, offset) & 0x0000FFFF; + } + + /** + * Writes a little-endian 2-byte short at the beginning of the given array. + * @param b The array to write to. + * @param n A short integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setShort(final byte[] b, final short n) { + setShort(b, n, 0); + } + + /** + * Writes a little-endian 2-byte short at an offset in the given array. + * @param b The array to write to. + * @param offset The offset in the array to start writing at. + * @param n A short integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setShort(final byte[] b, final short n, + final int offset) { + b[offset + 0] = (byte) (n >>> 0); + b[offset + 1] = (byte) (n >>> 8); + } + + /** + * Writes a little-endian 2-byte unsigned short at the beginning of the given array. + * @param b The array to write to. + * @param n An unsigned short integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setUnsignedShort(final byte[] b, final int n) { + setUnsignedShort(b, n, 0); + } + + /** + * Writes a little-endian 2-byte unsigned short at an offset in the given array. + * @param b The array to write to. + * @param offset The offset in the array to start writing at. + * @param n An unsigned short integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setUnsignedShort(final byte[] b, final int n, + final int offset) { + b[offset + 0] = (byte) (n >>> 0); + b[offset + 1] = (byte) (n >>> 8); + } + + /** + * Creates a new byte array containing a little-endian 2-byte short integer. + * @param n A short integer. + * @return A new byte array containing the given value. + */ + public static byte[] fromShort(final short n) { + final byte[] b = new byte[2]; + setShort(b, n); + return b; + } + + /** + * Creates a new byte array containing a little-endian 2-byte unsigned short integer. + * @param n An unsigned short integer. + * @return A new byte array containing the given value. + */ + public static byte[] fromUnsignedShort(final int n) { + final byte[] b = new byte[2]; + setUnsignedShort(b, n); + return b; + } + + /** + * Reads a little-endian 4-byte integer from the beginning of the given array. + * @param b The array to read from. + * @return An integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static int getInt(final byte[] b) { + return getInt(b, 0); + } + + /** + * Reads a little-endian 4-byte integer from an offset in the given array. + * @param b The array to read from. + * @param offset The offset in the array to start reading from. + * @return An integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static int getInt(final byte[] b, final int offset) { + return (b[offset + 0] & 0xFF) << 0 | + (b[offset + 1] & 0xFF) << 8 | + (b[offset + 2] & 0xFF) << 16 | + (b[offset + 3] & 0xFF) << 24; + } + + /** + * Reads a little-endian 4-byte unsigned integer from the beginning of the + * given array. + * @param b The array to read from. + * @return A positive integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static long getUnsignedInt(final byte[] b) { + return getUnsignedInt(b, 0); + } + + /** + * Reads a little-endian 4-byte unsigned integer from an offset in the + * given array. + * @param b The array to read from. + * @param offset The offset in the array to start reading from. + * @return A positive integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static long getUnsignedInt(final byte[] b, final int offset) { + return getInt(b, offset) & 0x00000000FFFFFFFFL; + } + + /** + * Writes a little-endian 4-byte int at the beginning of the given array. + * @param b The array to write to. + * @param n An integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setInt(final byte[] b, final int n) { + setInt(b, n, 0); + } + + /** + * Writes a little-endian 4-byte int at an offset in the given array. + * @param b The array to write to. + * @param offset The offset in the array to start writing at. + * @param n An integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setInt(final byte[] b, final int n, final int offset) { + b[offset + 0] = (byte) (n >>> 0); + b[offset + 1] = (byte) (n >>> 8); + b[offset + 2] = (byte) (n >>> 16); + b[offset + 3] = (byte) (n >>> 24); + } + + /** + * Writes a little-endian 4-byte unsigned int at the beginning of the given array. + * @param b The array to write to. + * @param n An unsigned integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setUnsignedInt(final byte[] b, final long n) { + setUnsignedInt(b, n, 0); + } + + /** + * Writes a little-endian 4-byte unsigned int at an offset in the given array. + * @param b The array to write to. + * @param offset The offset in the array to start writing at. + * @param n An unsigned integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setUnsignedInt(final byte[] b, final long n, final int offset) { + b[offset + 0] = (byte) (n >>> 0); + b[offset + 1] = (byte) (n >>> 8); + b[offset + 2] = (byte) (n >>> 16); + b[offset + 3] = (byte) (n >>> 24); + } + + public static void putVarInt32(final ByteBuffer b, final int v) { + int bee = 128; + if (v < (1 << 7)) { + b.put((byte)v); + } else if (v < (1 << 14)) { + b.put((byte)(v | bee)); + b.put((byte)((v >> 7) | bee)); + } else if (v < (1 << 21)) { + b.put((byte)(v | bee)); + b.put((byte)((v >> 7) | bee)); + b.put((byte)(v >> 14)); + } else if (v < (1 << 28)) { + b.put((byte)(v | bee)); + b.put((byte)((v >> 7) | bee)); + b.put((byte)((v >> 14) | bee)); + b.put((byte)(v >> 21)); + } else { + b.put((byte)(v | bee)); + b.put((byte)((v >> 7) | bee)); + b.put((byte)((v >> 14) | bee)); + b.put((byte)((v >> 21) | bee)); + b.put((byte)(v >> 28)); + } + } + + /** + * Reads a 32-bit variable-length integer value as used in Protocol Buffers. + * @param buf The buffer to read from. + * @return The integer read. + */ + static int readVarInt32(final ByteBuf buf) { + int result = buf.readByte(); + if (result >= 0) { + return result; + } + result &= 0x7F; + result |= buf.readByte() << 7; + if (result >= 0) { + return result; + } + result &= 0x3FFF; + result |= buf.readByte() << 14; + if (result >= 0) { + return result; + } + result &= 0x1FFFFF; + result |= buf.readByte() << 21; + if (result >= 0) { + return result; + } + result &= 0x0FFFFFFF; + final byte b = buf.readByte(); + result |= b << 28; + if (b >= 0) { + return result; + } + throw new IllegalArgumentException("Not a 32 bit varint: " + result + + " (5th byte: " + b + ")"); + } + + public static byte[] fromBoolean(final boolean n) { + final byte[] b = new byte[1]; + b[0] = (byte) (n ? 1 : 0); + return b; + } + + /** + * Creates a new byte array containing a little-endian 4-byte integer. + * @param n An integer. + * @return A new byte array containing the given value. + */ + public static byte[] fromInt(final int n) { + final byte[] b = new byte[4]; + setInt(b, n); + return b; + } + + /** + * Creates a new byte array containing a little-endian 4-byte unsigned integer. + * @param n An unsigned integer. + * @return A new byte array containing the given value. + */ + public static byte[] fromUnsignedInt(final long n) { + final byte[] b = new byte[4]; + setUnsignedInt(b, n); + return b; + } + + /** + * Reads a little-endian 8-byte unsigned long from the beginning of the given array. + * @param b The array to read from. + * @return A long integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static BigInteger getUnsignedLong(final byte[] b) { + return getUnsignedLong(b, 0); + } + + /** + * Reads a little-endian 8-byte unsigned long from an offset in the given array. + * @param b The array to read from. + * @param offset The offset in the array to start reading from. + * @return A long integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static BigInteger getUnsignedLong(final byte[] b, final int offset) { + long l = getLong(b, offset); + BigInteger bi = BigInteger.valueOf(l); + if (bi.compareTo(BigInteger.ZERO) < 0) { + bi = bi.add(TWO_COMPL_REF); + } + return bi; + } + + /** + * Reads a little-endian 8-byte long from the beginning of the given array. + * @param b The array to read from. + * @return A long integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static long getLong(final byte[] b) { + return getLong(b, 0); + } + + /** + * Reads a little-endian 8-byte long from an offset in the given array. + * @param b The array to read from. + * @param offset The offset in the array to start reading from. + * @return A long integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static long getLong(final byte[] b, final int offset) { + return (b[offset + 0] & 0xFFL) << 0 | + (b[offset + 1] & 0xFFL) << 8 | + (b[offset + 2] & 0xFFL) << 16 | + (b[offset + 3] & 0xFFL) << 24 | + (b[offset + 4] & 0xFFL) << 32 | + (b[offset + 5] & 0xFFL) << 40 | + (b[offset + 6] & 0xFFL) << 48 | + (b[offset + 7] & 0xFFL) << 56; + } + + /** + * Writes a little-endian 8-byte long at the beginning of the given array. + * @param b The array to write to. + * @param n A long integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setLong(final byte[] b, final long n) { + setLong(b, n, 0); + } + + /** + * Writes a little-endian 8-byte long at an offset in the given array. + * @param b The array to write to. + * @param n A long integer. + * @param offset The offset in the array to start writing at. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setLong(final byte[] b, final long n, final int offset) { + b[offset + 0] = (byte) (n >>> 0); + b[offset + 1] = (byte) (n >>> 8); + b[offset + 2] = (byte) (n >>> 16); + b[offset + 3] = (byte) (n >>> 24); + b[offset + 4] = (byte) (n >>> 32); + b[offset + 5] = (byte) (n >>> 40); + b[offset + 6] = (byte) (n >>> 48); + b[offset + 7] = (byte) (n >>> 56); + } + + /** + * Writes a little-endian 8-byte unsigned long at the beginning of the given array. + * @param b The array to write to. + * @param n An unsigned long integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setUnsignedLong(final byte[] b, final BigInteger n) { + setUnsignedLong(b, n, 0); + } + + /** + * Writes a little-endian 8-byte unsigned long at an offset in the given array. + * @param b The array to write to. + * @param offset The offset in the array to start writing at. + * @param n An unsigned long integer. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setUnsignedLong(final byte[] b, final BigInteger n, final int offset) { + setLong(b, n.longValue(), offset); + } + + /** + * Creates a new byte array containing a little-endian 8-byte long integer. + * @param n A long integer. + * @return A new byte array containing the given value. + */ + public static byte[] fromLong(final long n) { + final byte[] b = new byte[8]; + setLong(b, n); + return b; + } + + /** + * Creates a new byte array containing a little-endian 8-byte unsigned long integer. + * @param n An unsigned long integer. + * @return A new byte array containing the given value. + */ + public static byte[] fromUnsignedLong(final BigInteger n) { + final byte[] b = new byte[8]; + setUnsignedLong(b, n); + return b; + } + + /** + * Reads a little-endian 16-byte integer from the beginning of the given array. + * @param b The array to read from. + * @return A BigInteger. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static BigInteger getBigInteger(final byte[] b) { + return getBigInteger(b, 0); + } + + /** + * Reads a little-endian 16-byte integer from an offset in the given array. + * @param b The array to read from. + * @param offset The offset in the array to start reading from. + * @return A BigInteger. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static BigInteger getBigInteger(final byte[] b, final int offset) { + // TODO: Support larger/smaller than 16 bytes (int128) + byte[] bytes = Arrays.copyOfRange(b, offset, offset + 16); + // BigInteger expects big-endian order. + reverseBytes(bytes); + return new BigInteger(bytes); + } + + /** + * Writes a little-endian 16-byte BigInteger at the beginning of the given array. + * @param b The array to write to. + * @param n A BigInteger. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setBigInteger(final byte[] b, final BigInteger n) { + setBigInteger(b, n, 0); + } + + /** + * Writes a little-endian 16-byte BigInteger at an offset in the given array. + * @param b The zeroed byte array to write to. + * @param n A BigInteger. + * @param offset The offset in the array to start writing at. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setBigInteger(final byte[] b, final BigInteger n, final int offset) { + byte[] bytes = n.toByteArray(); + // TODO: Support larger/smaller than 16 bytes (int128) + // Guard against values that are too large. + if (bytes.length > 16) { + throw new IllegalArgumentException("Value is larger than the maximum 16 bytes: " + n); + } + // BigInteger is big-endian order. + reverseBytes(bytes); + System.arraycopy(bytes, 0, b, offset, bytes.length); + // If the value is negative trail with set bits. + if (n.compareTo(BigInteger.ZERO) < 0) { + Arrays.fill(b, offset + bytes.length, offset + 16, (byte) 0xff); + } + } + + /** + * Creates a new byte array containing a little-endian 16-byte BigInteger. + * @param n A BigInteger. + * @return A new byte array containing the given value. + */ + public static byte[] fromBigInteger(final BigInteger n) { + // TODO: Support larger/smaller than 16 bytes (int128) + final byte[] b = new byte[16]; + setBigInteger(b, n); + return b; + } + + /** + * Reverses the passed byte array in place. + * @param b The array to reverse. + */ + private static void reverseBytes(final byte[] b) { + // Swaps the items until the mid-point is reached. + for (int i = 0; i < b.length / 2; i++) { + byte temp = b[i]; + b[i] = b[b.length - i - 1]; + b[b.length - i - 1] = temp; + } + } + + /** + * Reads a little-endian 4-byte float from the beginning of the given array. + * @param b The array to read from. + * @return a float + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static float getFloat(final byte[] b) { + return getFloat(b, 0); + } + + /** + * Reads a little-endian 4-byte float from an offset in the given array. + * @param b The array to read from. + * @param offset The offset in the array to start reading from. + * @return a float + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static float getFloat(final byte[] b, final int offset) { + return Float.intBitsToFloat(getInt(b, offset)); + } + + /** + * Writes a little-endian 4-byte float at the beginning of the given array. + * @param b The array to write to. + * @param n a float + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setFloat(final byte[] b, final float n) { + setFloat(b, n, 0); + } + + /** + * Writes a little-endian 4-byte float at an offset in the given array. + * @param b The array to write to. + * @param offset The offset in the array to start writing at. + * @param n a float + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setFloat(final byte[] b, final float n, final int offset) { + setInt(b, Float.floatToIntBits(n), offset); + } + + /** + * Creates a new byte array containing a little-endian 4-byte float. + * @param n A float + * @return A new byte array containing the given value. + */ + public static byte[] fromFloat(float n) { + byte[] b = new byte[4]; + setFloat(b, n); + return b; + } + + /** + * Reads a little-endian 8-byte double from the beginning of the given array. + * @param b The array to read from. + * @return a double + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static double getDouble(final byte[] b) { + return getDouble(b, 0); + } + + /** + * Reads a little-endian 8-byte double from an offset in the given array. + * @param b The array to read from. + * @param offset The offset in the array to start reading from. + * @return a double + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static double getDouble(final byte[] b, final int offset) { + return Double.longBitsToDouble(getLong(b, offset)); + } + + /** + * Writes a little-endian 8-byte double at the beginning of the given array. + * @param b The array to write to. + * @param n a double + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setDouble(final byte[] b, final double n) { + setDouble(b, n, 0); + } + + /** + * Writes a little-endian 8-byte double at an offset in the given array. + * @param b The array to write to. + * @param offset The offset in the array to start writing at. + * @param n a double + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setDouble(final byte[] b, final double n, final int offset) { + setLong(b, Double.doubleToLongBits(n), offset); + } + + /** + * Creates a new byte array containing a little-endian 8-byte double. + * @param n A double + * @return A new byte array containing the given value. + */ + public static byte[] fromDouble(double n) { + byte[] b = new byte[8]; + setDouble(b, n); + return b; + } + + /** + * Reads a decimal from the beginning of the given array. + * @param b The array to read from. + * @param precision The precision of the decimal value. + * @return A BigDecimal. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static BigDecimal getDecimal(final byte[] b, int precision, int scale) { + return getDecimal(b, 0, precision, scale); + } + + /** + * Reads a decimal from the beginning of the given array. + * @param b The array to read from. + * @param offset The offset in the array to start reading from. + * @param precision The precision of the decimal value. + * @return A BigDecimal. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static BigDecimal getDecimal(final byte[] b, final int offset, int precision, int scale) { + int size = DecimalUtil.precisionToSize(precision); + switch (size) { + case DecimalUtil.DECIMAL32_SIZE: + int intVal = getInt(b, offset); + return BigDecimal.valueOf(intVal, scale); + case DecimalUtil.DECIMAL64_SIZE: + long longVal = getLong(b, offset); + return BigDecimal.valueOf(longVal, scale); + case DecimalUtil.DECIMAL128_SIZE: + BigInteger int128Val = getBigInteger(b, offset); + return new BigDecimal(int128Val, scale); + default: + throw new IllegalArgumentException("Unsupported decimal type size: " + size); + } + } + + /** + * Writes a BigDecimal at the beginning of the given array. + * + * @param b The array to write to. + * @param n A BigDecimal. + * @param precision The target precision of the decimal value. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setBigDecimal(final byte[] b, final BigDecimal n, int precision) { + setBigDecimal(b, n, precision, 0); + } + + /** + * Writes a BigDecimal at an offset in the given array. + * @param b The array to write to. + * @param n A BigDecimal. + * @param precision The target precision of the decimal value. + * @param offset The offset in the array to start writing at. + * @throws IndexOutOfBoundsException if the byte array is too small. + */ + public static void setBigDecimal(final byte[] b, final BigDecimal n, int precision, + final int offset) { + int size = DecimalUtil.precisionToSize(precision); + BigInteger bigInt = n.unscaledValue(); + switch (size) { + case DecimalUtil.DECIMAL32_SIZE: + // TODO: use n.unscaledValue().intValueExact() when we drop Java7 support. + if (bigInt.compareTo(BIGINT32_MIN) >= 0 && bigInt.compareTo(BIGINT32_MAX) <= 0) { + setInt(b, bigInt.intValue(), offset); + } else { + throw new ArithmeticException("BigInteger out of int range"); + } + break; + case DecimalUtil.DECIMAL64_SIZE: + // TODO: use n.unscaledValue().intValueExact() when we drop Java7 support. + if (bigInt.compareTo(BIGINT64_MIN) >= 0 && bigInt.compareTo(BIGINT64_MAX) <= 0) { + setLong(b, bigInt.longValue(), offset); + } else { + throw new ArithmeticException("BigInteger out of int range"); + } + break; + case DecimalUtil.DECIMAL128_SIZE: + setBigInteger(b, bigInt, offset); + break; + default: + throw new IllegalArgumentException("Unsupported decimal type size: " + size); + } + } + + /** + * Creates a new byte array containing a little-endian BigDecimal. + * @param n A BigDecimal. + * @param precision The target precision of the decimal value. + * @return A new byte array containing the given value. + */ + public static byte[] fromBigDecimal(final BigDecimal n, int precision) { + int size = DecimalUtil.precisionToSize(precision); + switch (size) { + case DecimalUtil.DECIMAL32_SIZE: + return fromInt(n.unscaledValue().intValue()); + case DecimalUtil.DECIMAL64_SIZE: + return fromLong(n.unscaledValue().longValue()); + case DecimalUtil.DECIMAL128_SIZE: + return fromBigInteger(n.unscaledValue()); + default: + throw new IllegalArgumentException("Unsupported decimal type size: " + size); + } + } + + // CHECKSTYLE:OFF + /** Transforms a string into an UTF-8 encoded byte array. */ + public static byte[] UTF8(final String s) { + return s.getBytes(CharsetUtil.UTF_8); + } + + /** Transforms a string into an ISO-8859-1 encoded byte array. */ + public static byte[] ISO88591(final String s) { + return s.getBytes(CharsetUtil.ISO_8859_1); + } + // CHECKSTYLE:ON + // ---------------------------- // + // Pretty-printing byte arrays. // + // ---------------------------- // + + private static final char[] HEX = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F' + }; + + /** + * Pretty-prints a byte array into a human-readable output buffer. + * @param outbuf The buffer where to write the output. + * @param array The (possibly {@code null}) array to pretty-print. + */ + public static void pretty(final StringBuilder outbuf, final byte[] array) { + if (array == null) { + outbuf.append("null"); + return; + } + int ascii = 0; + final int start_length = outbuf.length(); + final int n = array.length; + outbuf.ensureCapacity(start_length + 1 + n + 1); + outbuf.append('"'); + for (int i = 0; i < n; i++) { + final byte b = array[i]; + if (' ' <= b && b <= '~') { + ascii++; + outbuf.append((char) b); + } else if (b == '\n') { + outbuf.append('\\').append('n'); + } else if (b == '\t') { + outbuf.append('\\').append('t'); + } else { + outbuf.append("\\x") + .append(HEX[(b >>> 4) & 0x0F]) + .append(HEX[b & 0x0F]); + } + } + if (ascii < n / 2) { + outbuf.setLength(start_length); + outbuf.append(Arrays.toString(array)); + } else { + outbuf.append('"'); + } + } + + /** + * Pretty-prints an array of byte arrays into a human-readable output buffer. + * @param outbuf The buffer where to write the output. + * @param arrays The (possibly {@code null}) array of arrays to pretty-print. + * @since 1.3 + */ + public static void pretty(final StringBuilder outbuf, final byte[][] arrays) { + if (arrays == null) { + outbuf.append("null"); + return; + } else { // Do some right-sizing. + int size = 2; + for (int i = 0; i < arrays.length; i++) { + size += 2 + 2 + arrays[i].length; + } + outbuf.ensureCapacity(outbuf.length() + size); + } + outbuf.append('['); + for (int i = 0; i < arrays.length; i++) { + Bytes.pretty(outbuf, arrays[i]); + outbuf.append(", "); + } + outbuf.setLength(outbuf.length() - 2); // Remove the last ", " + outbuf.append(']'); + } + + /** + * Pretty-prints a byte array into a human-readable string. + * @param array The (possibly {@code null}) array to pretty-print. + * @return The array in a pretty-printed string. + */ + public static String pretty(final byte[] array) { + if (array == null) { + return "null"; + } + final StringBuilder buf = new StringBuilder(1 + array.length + 1); + pretty(buf, array); + return buf.toString(); + } + + /** + * Pretty-prints all the bytes of a buffer into a human-readable string. + * @param buf The (possibly {@code null}) buffer to pretty-print. + * @return The buffer in a pretty-printed string. + */ + public static String pretty(final ByteBuf buf) { + if (buf == null) { + return "null"; + } + byte[] array = new byte[buf.readableBytes()]; + buf.getBytes(buf.readerIndex(), array); + return pretty(array); + } + + /** + * Convert a byte array to a hex encoded string. + * @param bytes the bytes to encode + * @return the hex encoded bytes + */ + public static String hex(byte[] bytes) { + StringBuilder sb = new StringBuilder(2 + bytes.length * 2); + sb.append('0'); + sb.append('x'); + sb.append(BaseEncoding.base16().encode(bytes)); + return sb.toString(); + } + + // ---------------------- // + // Comparing byte arrays. // + // ---------------------- // + // Don't ask me why this isn't in java.util.Arrays. + + /** + * A singleton {@link Comparator} for non-{@code null} byte arrays. + * @see #memcmp + */ + public static final MemCmp MEMCMP = new MemCmp(); + + /** {@link Comparator} for non-{@code null} byte arrays. */ + private static final class MemCmp implements Comparator, Serializable { + + private static final long serialVersionUID = 914981342853419168L; + + private MemCmp() { // Can't instantiate outside of this class. + } + + @Override + public int compare(final byte[] a, final byte[] b) { + return memcmp(a, b); + } + + } + + /** + * {@code memcmp} in Java, hooray. + * @param a First non-{@code null} byte array to compare. + * @param b Second non-{@code null} byte array to compare. + * @return 0 if the two arrays are identical, otherwise the difference + * between the first two different bytes, otherwise the different between + * their lengths. + */ + public static int memcmp(final byte[] a, final byte[] b) { + final int length = Math.min(a.length, b.length); + if (a == b) { // Do this after accessing a.length and b.length + return 0; // in order to NPE if either a or b is null. + } + for (int i = 0; i < length; i++) { + if (a[i] != b[i]) { + return (a[i] & 0xFF) - (b[i] & 0xFF); // "promote" to unsigned. + } + } + return a.length - b.length; + } + + /** + * {@code memcmp(3)} with a given offset and length. + * @param a First non-{@code null} byte array to compare. + * @param b Second non-{@code null} byte array to compare. + * @param offset The offset at which to start comparing both arrays. + * @param length The number of bytes to compare. + * @return 0 if the two arrays are identical, otherwise the difference + * between the first two different bytes (treated as unsigned), otherwise + * the different between their lengths. + * @throws IndexOutOfBoundsException if either array isn't large enough. + */ + public static int memcmp(final byte[] a, final byte[] b, + final int offset, int length) { + if (a == b && a != null) { + return 0; + } + length += offset; + for (int i = offset; i < length; i++) { + if (a[i] != b[i]) { + return (a[i] & 0xFF) - (b[i] & 0xFF); // "promote" to unsigned. + } + } + return 0; + } + + /** + * De-duplicates two byte arrays. + *

+ * If two byte arrays have the same contents but are different, this + * function helps to re-use the old one and discard the new copy. + * @param old The existing byte array. + * @param neww The new byte array we're trying to de-duplicate. + * @return {@code old} if {@code neww} is a different array with the same + * contents, otherwise {@code neww}. + */ + public static byte[] deDup(final byte[] old, final byte[] neww) { + return memcmp(old, neww) == 0 ? old : neww; + } + + /** + * Tests whether two byte arrays have the same contents. + * @param a First non-{@code null} byte array to compare. + * @param b Second non-{@code null} byte array to compare. + * @return {@code true} if the two arrays are identical, + * {@code false} otherwise. + */ + public static boolean equals(final byte[] a, final byte[] b) { + return memcmp(a, b) == 0; + } + + /** + * {@code memcmp(3)} in Java for possibly {@code null} arrays, hooray. + * @param a First possibly {@code null} byte array to compare. + * @param b Second possibly {@code null} byte array to compare. + * @return 0 if the two arrays are identical (or both are {@code null}), + * otherwise the difference between the first two different bytes (treated + * as unsigned), otherwise the different between their lengths (a {@code + * null} byte array is considered shorter than an empty byte array). + */ + public static int memcmpMaybeNull(final byte[] a, final byte[] b) { + if (a == null) { + if (b == null) { + return 0; + } + return -1; + } else if (b == null) { + return 1; + } + return memcmp(a, b); + } + + public static int getBitSetSize(int items) { + return (items + 7) / 8; + } + + public static byte[] fromBitSet(BitSet bits, int colCount) { + byte[] bytes = new byte[getBitSetSize(colCount)]; + for (int i = 0; i < bits.length(); i++) { + if (bits.get(i)) { + bytes[i / 8] |= (byte)(1 << (i % 8)); + } + } + return bytes; + } + + public static BitSet toBitSet(byte[] b, int offset, int colCount) { + BitSet bs = new BitSet(colCount); + for (int i = 0; i < colCount; i++) { + if ((b[offset + (i / 8)] >> (i % 8) & 1) == 1) { + bs.set(i); + } + } + return bs; + } + + /** + * This method will apply xor on the left most bit of the provided byte. This is used in Kudu to + * have unsigned data types sorting correctly. + * @param value byte whose left most bit will be xor'd + * @return same byte with xor applied on the left most bit + */ + public static byte xorLeftMostBit(byte value) { + value ^= (byte)(1 << 7); + return value; + } + + /** + * Get the byte array representation of this string, with UTF8 encoding + * @param data String get the byte array from + * @return UTF8 byte array + */ + public static byte[] fromString(String data) { + return UTF8(data); + } + + /** + * Get a string from the passed byte array, with UTF8 encoding + * @param b byte array to convert to string, possibly coming from {@link #fromString(String)} + * @return A new string built with the byte array + */ + public static String getString(byte[] b) { + return getString(b, 0, b.length); + } + + public static String getString(Slice slice) { + return slice.toString(CharsetUtil.UTF_8); + } + + /** + * Get a string from the passed byte array, at the specified offset and for the specified + * length, with UTF8 encoding + * @param b byte array to convert to string, possibly coming from {@link #fromString(String)} + * @param offset where to start reading from in the byte array + * @param len how many bytes we should read + * @return A new string built with the byte array + */ + public static String getString(byte[] b, int offset, int len) { + if (len == 0) { + return ""; + } + return new String(b, offset, len, CharsetUtil.UTF_8); + } + + /** + * Utility method to write a byte array to a data output. Equivalent of doing a writeInt of the + * length followed by a write of the byte array. Convert back with {@link #readByteArray} + * @param dataOutput + * @param b + * @throws IOException + */ + public static void writeByteArray(DataOutput dataOutput, byte[] b) throws IOException { + dataOutput.writeInt(b.length); + dataOutput.write(b); + } + + /** + * Utility method to read a byte array written the way {@link #writeByteArray} does it. + * @param dataInput + * @return The read byte array + * @throws IOException + */ + public static byte[] readByteArray(DataInput dataInput) throws IOException { + int len = dataInput.readInt(); + byte[] data = new byte[len]; + dataInput.readFully(data); + return data; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CallResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CallResponse.java new file mode 100644 index 0000000000..855419bbba --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CallResponse.java @@ -0,0 +1,191 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.List; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.DefaultByteBufHolder; +import io.netty.channel.ChannelHandlerContext; +import io.netty.handler.codec.ByteToMessageDecoder; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.rpc.RpcHeader; +import org.apache.kudu.util.Slice; + +/** + * This class handles information received from an RPC response, providing + * access to sidecars and decoded protobufs from the message. + */ +@InterfaceAudience.Private +final class CallResponse extends DefaultByteBufHolder { + private final ByteBuf buf; + private final RpcHeader.ResponseHeader header; + private final int totalResponseSize; + + // Non-header main message slice is generated upon request and cached. + private Slice message = null; + + /** + * Performs some sanity checks on the sizes recorded in the packet + * referred to by {@code buf}. Assumes that {@code buf} has not been + * read from yet, and will only be accessed by this class. + * + * Afterwards, this constructs the RpcHeader from the buffer. + * @param buf Byte buffer which call response reads from. + * @throws IndexOutOfBoundsException if any length prefix inside the + * response points outside the bounds of the buffer. + */ + CallResponse(final ByteBuf buf) { + super(buf); + this.buf = buf; + + this.totalResponseSize = buf.readableBytes(); + final int headerSize = Bytes.readVarInt32(buf); + // No needs to bounds-check the size since 'buf' is already sized appropriately. + final Slice headerSlice = nextBytes(buf, headerSize); + RpcHeader.ResponseHeader.Builder builder = RpcHeader.ResponseHeader.newBuilder(); + KuduRpc.readProtobuf(headerSlice, builder); + this.header = builder.build(); + } + + /** + * @return the parsed header + */ + public RpcHeader.ResponseHeader getHeader() { + return this.header; + } + + /** + * @return the total response size + */ + public int getTotalResponseSize() { + return this.totalResponseSize; + } + + /** + * @return A slice pointing to the section of the packet reserved for the main + * protobuf message. + * @throws IllegalStateException If the offset for the main protobuf message + * is not valid. + */ + public Slice getPBMessage() { + cacheMessage(); + final int mainLength = this.header.getSidecarOffsetsCount() == 0 ? + this.message.length() : this.header.getSidecarOffsets(0); + if (mainLength < 0 || mainLength > this.message.length()) { + throw new IllegalStateException("Main protobuf message invalid. " + + "Length is " + mainLength + " while the size of the message " + + "excluding the header is " + this.message.length()); + } + return subslice(this.message, 0, mainLength); + } + + /** + * @param sidecar The index of the sidecar to retrieve. + * @return A slice pointing to the desired sidecar. + * @throws IllegalStateException If the sidecar offsets specified in the + * header response PB are not valid offsets for the array. + * @throws IllegalArgumentException If the sidecar with the specified index + * does not exist. + */ + public Slice getSidecar(int sidecar) { + cacheMessage(); + + List sidecarList = this.header.getSidecarOffsetsList(); + if (sidecar < 0 || sidecar > sidecarList.size()) { + throw new IllegalArgumentException("Sidecar " + sidecar + + " not valid, response has " + sidecarList.size() + " sidecars"); + } + + final int prevOffset = sidecarList.get(sidecar); + final int nextOffset = sidecar + 1 == sidecarList.size() ? + this.message.length() : sidecarList.get(sidecar + 1); + final int length = nextOffset - prevOffset; + + if (prevOffset < 0 || length < 0 || prevOffset + length > this.message.length()) { + throw new IllegalStateException("Sidecar " + sidecar + " invalid " + + "(offset = " + prevOffset + ", length = " + length + "). The size " + + "of the message " + "excluding the header is " + this.message.length()); + } + + return subslice(this.message, prevOffset, length); + } + + // Reads the message after the header if not read yet + private void cacheMessage() { + if (this.message != null) { + return; + } + final int length = Bytes.readVarInt32(buf); + this.message = nextBytes(buf, length); + } + + // Accounts for a parent slice's offset when making a new one with relative offsets. + private static Slice subslice(Slice parent, int offset, int length) { + return new Slice(parent.getRawArray(), parent.getRawOffset() + offset, length); + } + + // After checking the length, generates a slice for the next 'length' + // bytes of 'buf'. Advances the buffer's read index by 'length' bytes. + private static Slice nextBytes(final ByteBuf buf, final int length) { + byte[] payload; + int offset; + if (buf.hasArray()) { // Zero copy. + payload = buf.array(); + offset = buf.arrayOffset() + buf.readerIndex(); + buf.skipBytes(length); + } else { // We have to copy the entire payload out of the buffer :( + payload = new byte[length]; + buf.readBytes(payload); + offset = 0; + } + return new Slice(payload, offset, length); + } + + @Override + public boolean equals(Object o) { + return super.equals(o); + } + + @Override + public int hashCode() { + return super.hashCode(); + } + + /** + * Netty decoder which receives incoming frames (ByteBuf) + * and constructs CallResponse objects. + */ + static class Decoder extends ByteToMessageDecoder { + + Decoder() { + // Only one message is decoded on each read. + setSingleDecode(true); + } + + @Override + protected void decode(ChannelHandlerContext ctx, ByteBuf msg, List out) { + // Increase the reference count because CallResponse holds onto and uses the ByteBuf. + // https://netty.io/wiki/reference-counted-objects.html + msg.retain(); + out.add(new CallResponse(msg)); + } + } + +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ColumnRangePredicate.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ColumnRangePredicate.java new file mode 100644 index 0000000000..ec7e0b8a8d --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ColumnRangePredicate.java @@ -0,0 +1,440 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.UnsafeByteOperations; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnTypeAttributes; +import org.apache.kudu.Type; +import org.apache.kudu.tserver.Tserver; +import org.apache.kudu.util.DecimalUtil; + +/** + * A range predicate on one of the columns in the underlying data. + * Both boundaries are inclusive. + * @deprecated use the {@link KuduPredicate} class instead. + */ +@InterfaceAudience.Public +@Deprecated +public class ColumnRangePredicate { + + private final Tserver.ColumnRangePredicatePB.Builder pb = Tserver.ColumnRangePredicatePB + .newBuilder(); + private final ColumnSchema column; + private byte[] lowerBound = null; + private byte[] upperBound = null; + + /** + * Create the predicate on the specified column + * @param column + */ + public ColumnRangePredicate(ColumnSchema column) { + this.column = column; + this.pb.setColumn(ProtobufHelper.columnToPb(column)); + } + + private void setLowerBoundInternal(byte[] value) { + this.lowerBound = value; + pb.setLowerBound(UnsafeByteOperations.unsafeWrap(this.lowerBound)); + } + + private void setUpperBoundInternal(byte[] value) { + this.upperBound = value; + pb.setInclusiveUpperBound(UnsafeByteOperations.unsafeWrap(this.upperBound)); + } + + /** + * Convert a bound into a {@link KuduPredicate}. + * @param column the column + * @param op the bound comparison operator + * @param bound the bound + * @return the {@code KuduPredicate} + */ + private static KuduPredicate toKuduPredicate(ColumnSchema column, + KuduPredicate.ComparisonOp op, + byte[] bound) { + if (bound == null) { + return null; + } + switch (column.getType().getDataType(column.getTypeAttributes())) { + case BOOL: + return KuduPredicate.newComparisonPredicate(column, op, Bytes.getBoolean(bound)); + case INT8: + return KuduPredicate.newComparisonPredicate(column, op, Bytes.getByte(bound)); + case INT16: + return KuduPredicate.newComparisonPredicate(column, op, Bytes.getShort(bound)); + case INT32: + case DATE: + return KuduPredicate.newComparisonPredicate(column, op, Bytes.getInt(bound)); + case INT64: + case UNIXTIME_MICROS: + return KuduPredicate.newComparisonPredicate(column, op, Bytes.getLong(bound)); + case FLOAT: + return KuduPredicate.newComparisonPredicate(column, op, Bytes.getFloat(bound)); + case DOUBLE: + return KuduPredicate.newComparisonPredicate(column, op, Bytes.getDouble(bound)); + case VARCHAR: + case STRING: + return KuduPredicate.newComparisonPredicate(column, op, Bytes.getString(bound)); + case BINARY: + return KuduPredicate.newComparisonPredicate(column, op, bound); + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + ColumnTypeAttributes typeAttributes = column.getTypeAttributes(); + return KuduPredicate.newComparisonPredicate(column, op, + Bytes.getDecimal(bound, typeAttributes.getPrecision(), typeAttributes.getScale())); + default: + throw new IllegalStateException(String.format("unknown column type %s", column.getType())); + } + } + + /** + * Convert this column range predicate into a {@link KuduPredicate}. + * @return the column predicate. + */ + public KuduPredicate toKuduPredicate() { + KuduPredicate lower = + toKuduPredicate(column, KuduPredicate.ComparisonOp.GREATER_EQUAL, lowerBound); + KuduPredicate upper = + toKuduPredicate(column, KuduPredicate.ComparisonOp.LESS_EQUAL, upperBound); + + if (upper != null && lower != null) { + return upper.merge(lower); + } else if (upper != null) { + return upper; + } else { + return lower; + } + } + + /** + * Set a boolean for the lower bound + * @param lowerBound value for the lower bound + */ + public void setLowerBound(boolean lowerBound) { + checkColumn(Type.BOOL); + setLowerBoundInternal(Bytes.fromBoolean(lowerBound)); + } + + /** + * Set a byte for the lower bound + * @param lowerBound value for the lower bound + */ + public void setLowerBound(byte lowerBound) { + checkColumn(Type.INT8); + setLowerBoundInternal(new byte[] {lowerBound}); + } + + /** + * Set a short for the lower bound + * @param lowerBound value for the lower bound + */ + public void setLowerBound(short lowerBound) { + checkColumn(Type.INT16); + setLowerBoundInternal(Bytes.fromShort(lowerBound)); + } + + /** + * Set an int for the lower bound + * @param lowerBound value for the lower bound + */ + public void setLowerBound(int lowerBound) { + checkColumn(Type.INT32); + setLowerBoundInternal(Bytes.fromInt(lowerBound)); + } + + /** + * Set a long for the lower bound + * + * If 'lowerBound' is a timestamp see {@link PartialRow#addLong(String, long)} for the + * format. + * + * @param lowerBound value for the lower bound + */ + public void setLowerBound(long lowerBound) { + checkColumn(Type.INT64, Type.UNIXTIME_MICROS); + setLowerBoundInternal(Bytes.fromLong(lowerBound)); + } + + /** + * Set a string for the lower bound + * @param lowerBound value for the lower bound + */ + public void setLowerBound(String lowerBound) { + checkColumn(Type.STRING, Type.VARCHAR); + setLowerBoundInternal(lowerBound.getBytes(UTF_8)); + } + + /** + * Set a binary value for the lower bound + * @param lowerBound value for the lower bound + */ + public void setLowerBound(byte[] lowerBound) { + checkColumn(Type.BINARY); + setLowerBoundInternal(lowerBound); + } + + /** + * Set a float for the lower bound + * @param lowerBound value for the lower bound + */ + public void setLowerBound(float lowerBound) { + checkColumn(Type.FLOAT); + setLowerBoundInternal(Bytes.fromFloat(lowerBound)); + } + + /** + * Set a double for the lower bound + * @param lowerBound value for the lower bound + */ + public void setLowerBound(double lowerBound) { + checkColumn(Type.DOUBLE); + setLowerBoundInternal(Bytes.fromDouble(lowerBound)); + } + + /** + * Set a BigDecimal for the lower bound + * @param lowerBound value for the lower bound + */ + public void setLowerBound(BigDecimal lowerBound) { + checkColumn(Type.DECIMAL); + int precision = column.getTypeAttributes().getPrecision(); + int scale = column.getTypeAttributes().getScale(); + BigDecimal coercedVal = DecimalUtil.coerce(lowerBound, precision, scale); + setLowerBoundInternal(Bytes.fromBigDecimal(coercedVal, precision)); + } + + /** + * Set a boolean for the upper bound + * @param upperBound value for the upper bound + */ + public void setUpperBound(boolean upperBound) { + checkColumn(Type.BOOL); + setUpperBoundInternal(Bytes.fromBoolean(upperBound)); + } + + /** + * Set a byte for the upper bound + * @param upperBound value for the upper bound + */ + public void setUpperBound(byte upperBound) { + checkColumn(Type.INT8); + setUpperBoundInternal(new byte[] {upperBound}); + } + + /** + * Set a short for the upper bound + * @param upperBound value for the upper bound + */ + public void setUpperBound(short upperBound) { + checkColumn(Type.INT16); + setUpperBoundInternal(Bytes.fromShort(upperBound)); + } + + /** + * Set an int for the upper bound + * @param upperBound value for the upper bound + */ + public void setUpperBound(int upperBound) { + checkColumn(Type.INT32); + setUpperBoundInternal(Bytes.fromInt(upperBound)); + } + + /** + * Set a long for the upper bound + * + * If 'upperBound' is a timestamp see {@link PartialRow#addLong(String, long)} for the + * format. + * + * @param upperBound value for the upper bound + */ + public void setUpperBound(long upperBound) { + checkColumn(Type.INT64, Type.UNIXTIME_MICROS); + setUpperBoundInternal(Bytes.fromLong(upperBound)); + } + + /** + * Set a string for the upper bound + * @param upperBound value for the upper bound + */ + public void setUpperBound(String upperBound) { + checkColumn(Type.STRING, Type.VARCHAR); + setUpperBoundInternal(upperBound.getBytes(UTF_8)); + } + + /** + * Set a binary value for the upper bound + * @param upperBound value for the upper bound + */ + public void setUpperBound(byte[] upperBound) { + checkColumn(Type.BINARY); + setUpperBoundInternal(upperBound); + } + + /** + * Set a float for the upper bound + * @param upperBound value for the upper bound + */ + public void setUpperBound(float upperBound) { + checkColumn(Type.FLOAT); + setUpperBoundInternal(Bytes.fromFloat(upperBound)); + } + + /** + * Set a double for the upper bound + * @param upperBound value for the upper bound + */ + public void setUpperBound(double upperBound) { + checkColumn(Type.DOUBLE); + setUpperBoundInternal(Bytes.fromDouble(upperBound)); + } + + /** + * Set a BigDecimal for the upper bound + * @param upperBound value for the upper bound + */ + public void setUpperBound(BigDecimal upperBound) { + checkColumn(Type.DECIMAL); + int precision = column.getTypeAttributes().getPrecision(); + int scale = column.getTypeAttributes().getScale(); + BigDecimal coercedVal = DecimalUtil.coerce(upperBound, precision, scale); + setUpperBoundInternal(Bytes.fromBigDecimal(coercedVal, precision)); + } + + /** + * Get the column used by this predicate + * @return the column + */ + public ColumnSchema getColumn() { + return column; + } + + /** + * Get the lower bound in its raw representation + * @return lower bound as a byte array + */ + public byte[] getLowerBound() { + return lowerBound; + } + + /** + * Get the upper bound in its raw representation + * @return upper bound as a byte array + */ + public byte[] getUpperBound() { + return upperBound; + } + + /** + * Converts a list of predicates into an opaque byte array. This is a convenience method for use + * cases that require passing predicates as messages. + * @param predicates a list of predicates + * @return an opaque byte array, or null if the list was empty + */ + public static byte[] toByteArray(List predicates) { + if (predicates.isEmpty()) { + return null; + } + + Tserver.ColumnRangePredicateListPB.Builder predicateListBuilder = + Tserver.ColumnRangePredicateListPB.newBuilder(); + + for (ColumnRangePredicate crp : predicates) { + predicateListBuilder.addRangePredicates(crp.getPb()); + } + + return predicateListBuilder.build().toByteArray(); + } + + /** + * Converts a given byte array to a list of predicates in their pb format. + * @param listBytes bytes obtained from {@link #toByteArray(List)} + * @return a list of predicates + * @throws IllegalArgumentException thrown when the passed bytes aren't valid + */ + static List fromByteArray(byte[] listBytes) { + List predicates = new ArrayList<>(); + if (listBytes == null || listBytes.length == 0) { + return predicates; + } + Tserver.ColumnRangePredicateListPB list = ColumnRangePredicate.getPbFromBytes(listBytes); + return list.getRangePredicatesList(); + } + + /** + * Get the predicate in its protobuf form. + * @return this predicate in protobuf + */ + Tserver.ColumnRangePredicatePB getPb() { + return pb.build(); + } + + /** + * Creates a {@code ColumnRangePredicate} from a protobuf column range predicate message. + * @param pb the protobuf message + * @return a column range predicate + */ + static ColumnRangePredicate fromPb(Tserver.ColumnRangePredicatePB pb) { + ColumnRangePredicate pred = + new ColumnRangePredicate(ProtobufHelper.pbToColumnSchema(pb.getColumn())); + if (pb.hasLowerBound()) { + pred.setLowerBoundInternal(pb.getLowerBound().toByteArray()); + } + if (pb.hasInclusiveUpperBound()) { + pred.setUpperBoundInternal(pb.getInclusiveUpperBound().toByteArray()); + } + return pred; + } + + /** + * Convert a list of predicates given in bytes back to its pb format. It also hides the + * InvalidProtocolBufferException. + */ + private static Tserver.ColumnRangePredicateListPB getPbFromBytes(byte[] listBytes) { + try { + return Tserver.ColumnRangePredicateListPB.parseFrom(listBytes); + } catch (InvalidProtocolBufferException e) { + // We shade our pb dependency so we can't send out the exception above since other modules + // won't know what to expect. + // Intentionally not redacting the list to make this more useful. + throw new IllegalArgumentException("Encountered an invalid column range predicate list: " + + Bytes.pretty(listBytes), e); + } + } + + private void checkColumn(Type... passedTypes) { + for (Type type : passedTypes) { + if (this.column.getType().equals(type)) { + return; + } + } + throw new IllegalArgumentException(String.format("%s's type isn't %s, it's %s", + column.getName(), Arrays.toString(passedTypes), column.getType().getName())); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ColumnarRowResult.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ColumnarRowResult.java new file mode 100644 index 0000000000..721c6b210e --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ColumnarRowResult.java @@ -0,0 +1,311 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.sql.Timestamp; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnTypeAttributes; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.util.Slice; +import org.apache.kudu.util.TimestampUtil; + +/** + * RowResult represents one row from a scanner, in columnar layout. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +class ColumnarRowResult extends RowResult { + + private final Slice[] data; + private final Slice[] varlenData; + private final Slice[] nonNullBitmaps; + + /** + * Prepares the row representation using the provided data. Doesn't copy data + * out of the byte arrays. Package private. + * @param schema Schema used to build the rowData + * @param data The raw columnar data corresponding to the primitive-typed columns + * @param varlenData The variable-length data for the variable-length-typed columns + * @param nonNullBitmaps The bitmaps corresponding to the non-null status of the cells + * @param rowIndex The index of the row in data/varlenData/nonNullBitmaps + */ + ColumnarRowResult(Schema schema, Slice[] data, Slice[] varlenData, Slice[] nonNullBitmaps, + int rowIndex) { + super(schema, rowIndex); + this.data = data; + this.varlenData = varlenData; + this.nonNullBitmaps = nonNullBitmaps; + advancePointerTo(rowIndex); + } + + /** + * Get the specified column's integer + * @param columnIndex Column index in the schema + * @return an integer + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public int getInt(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.INT32, Type.DATE); + return Bytes.getInt(this.data[columnIndex].getRawArray(), + this.data[columnIndex].getRawOffset() + index * 4); + } + + /** + * Get the specified column's short + * @param columnIndex Column index in the schema + * @return a short + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public short getShort(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.INT16); + return Bytes.getShort(this.data[columnIndex].getRawArray(), + this.data[columnIndex].getRawOffset() + index * 2); + } + + /** + * Get the specified column's boolean + * @param columnIndex Column index in the schema + * @return a boolean + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public boolean getBoolean(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.BOOL); + byte b = Bytes.getByte(this.data[columnIndex].getRawArray(), + this.data[columnIndex].getRawOffset() + index); + return b == 1; + } + + /** + * Get the specified column's byte + * @param columnIndex Column index in the schema + * @return a byte + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public byte getByte(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.INT8); + return Bytes.getByte(this.data[columnIndex].getRawArray(), + this.data[columnIndex].getRawOffset() + index); + } + + /** + * Get the specified column's float + * @param columnIndex Column index in the schema + * @return a float + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public float getFloat(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.FLOAT); + return Bytes.getFloat(this.data[columnIndex].getRawArray(), + this.data[columnIndex].getRawOffset() + index * 4); + } + + /** + * Get the specified column's double + * @param columnIndex Column index in the schema + * @return a double + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public double getDouble(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.DOUBLE); + return Bytes.getDouble(this.data[columnIndex].getRawArray(), + this.data[columnIndex].getRawOffset() + index * 8); + } + + /** + * Get the specified column's Decimal. + * + * @param columnIndex Column index in the schema + * @return a BigDecimal. + * @throws IllegalArgumentException if the column is null + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public BigDecimal getDecimal(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.DECIMAL); + ColumnSchema column = schema.getColumnByIndex(columnIndex); + ColumnTypeAttributes typeAttributes = column.getTypeAttributes(); + return Bytes.getDecimal(this.data[columnIndex].getRawArray(), + this.data[columnIndex].getRawOffset() + Type.DECIMAL.getSize(typeAttributes) * index, + typeAttributes.getPrecision(), typeAttributes.getScale()); + } + + /** + * Get the specified column's Timestamp. + * + * @param columnIndex Column index in the schema + * @return a Timestamp + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public Timestamp getTimestamp(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.UNIXTIME_MICROS); + long micros = getLong(columnIndex); + return TimestampUtil.microsToTimestamp(micros); + } + + @Override + public String getVarLengthData(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.STRING, Type.VARCHAR); + // C++ puts a Slice in rowData which is 16 bytes long for simplicity, but we only support ints. + int offset = getOffsetForCurrentRow(columnIndex); + int length = getOffsetForNextRow(columnIndex) - offset; + assert offset < Integer.MAX_VALUE; + assert length < Integer.MAX_VALUE; + return Bytes.getString(varlenData[columnIndex].getRawArray(), + varlenData[columnIndex].getRawOffset() + offset, + length); + } + + /** + * Get a copy of the specified column's binary data. + * @param columnIndex Column index in the schema + * @return a byte[] with the binary data. + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public byte[] getBinaryCopy(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + // C++ puts a Slice in rowData which is 16 bytes long for simplicity, + // but we only support ints. + int offset = getOffsetForCurrentRow(columnIndex); + int length = getOffsetForNextRow(columnIndex) - offset; + assert offset < Integer.MAX_VALUE; + assert length < Integer.MAX_VALUE; + byte[] ret = new byte[length]; + System.arraycopy(varlenData[columnIndex].getRawArray(), + varlenData[columnIndex].getRawOffset() + offset, + ret, 0, length); + return ret; + } + + /** + * Get the specified column's binary data. + * + * This doesn't copy the data and instead returns a ByteBuffer that wraps it. + * + * @param columnIndex Column index in the schema + * @return a ByteBuffer with the binary data. + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public ByteBuffer getBinary(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.BINARY); + // C++ puts a Slice in rowData which is 16 bytes long for simplicity, + // but we only support ints. + int offset = getOffsetForCurrentRow(columnIndex); + int length = getOffsetForNextRow(columnIndex) - offset; + assert offset < Integer.MAX_VALUE; + assert length < Integer.MAX_VALUE; + return ByteBuffer.wrap(varlenData[columnIndex].getRawArray(), + varlenData[columnIndex].getRawOffset() + offset, length); + } + + @Override + public long getLong(int columnIndex) { + return Bytes.getLong(this.data[columnIndex].getRawArray(), + this.data[columnIndex].getRawOffset() + index * 8); + } + + protected int getOffsetForCurrentRow(int columnIndex) { + return Bytes.getInt(this.data[columnIndex].getRawArray(), + this.data[columnIndex].getRawOffset() + index * 4); + } + + protected int getOffsetForNextRow(int columnIndex) { + return Bytes.getInt(this.data[columnIndex].getRawArray(), + this.data[columnIndex].getRawOffset() + (index + 1) * 4); + } + + /** + * Get if the specified column is NULL + * @param columnIndex Column index in the schema + * @return true if the column cell is null and the column is nullable, + * false otherwise + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public boolean isNull(int columnIndex) { + checkValidColumn(columnIndex); + if (!schema.getColumnByIndex(columnIndex).isNullable()) { + return false; + } + + byte flag = Bytes.getByte(this.nonNullBitmaps[columnIndex].getRawArray(), + this.nonNullBitmaps[columnIndex].getRawOffset() + index / 8); + + boolean nonNull = (flag & (1 << (index % 8))) != 0; + return !nonNull; + } + + @Override + public String toString() { + return "ColumnarRowResult index: " + this.index; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ColumnarRowResultIterator.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ColumnarRowResultIterator.java new file mode 100644 index 0000000000..78d631da58 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ColumnarRowResultIterator.java @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.NoSuchElementException; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Schema; +import org.apache.kudu.WireProtocol; +import org.apache.kudu.util.Slice; + +/** + * Class that contains the rows in columnar layout sent by a tablet server, + * exhausting this iterator only means that all the rows from the last server response were read. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +@SuppressWarnings("IterableAndIterator") +class ColumnarRowResultIterator extends RowResultIterator { + + private static final ColumnarRowResultIterator EMPTY = + new ColumnarRowResultIterator(0, null, null, 0, + null, null, null, false); + + private final Slice[] data; + private final Slice[] varlenData; + private final Slice[] nonNullBitmaps; + private final RowResult sharedRowResult; + + /** + * Package private constructor, only meant to be instantiated from AsyncKuduScanner. + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param tsUUID UUID of the tablet server that handled our request + * @param schema schema used to parse the rows + * @param numRows how many rows are contained in the bs slice + * @param data The raw columnar data corresponding to the primitive-typed columns + * @param varlenData The variable-length data for the variable-length-typed columns + * @param nonNullBitmaps The bitmaps corresponding to the non-null status of the cells + * @param reuseRowResult reuse same row result for next row + */ + ColumnarRowResultIterator(long elapsedMillis, + String tsUUID, + Schema schema, + int numRows, + Slice[] data, + Slice[] varlenData, + Slice[] nonNullBitmaps, + boolean reuseRowResult) { + super(elapsedMillis, tsUUID, schema, numRows); + this.data = data; + this.varlenData = varlenData; + this.nonNullBitmaps = nonNullBitmaps; + this.sharedRowResult = (reuseRowResult && numRows != 0) ? + new ColumnarRowResult(this.schema, data, varlenData, nonNullBitmaps, -1) : + null; + } + + static ColumnarRowResultIterator makeRowResultIterator(long elapsedMillis, + String tsUUID, + Schema schema, + WireProtocol.ColumnarRowBlockPB data, + final CallResponse callResponse, + boolean reuseRowResult) { + if (data == null || data.getNumRows() == 0) { + return new ColumnarRowResultIterator(elapsedMillis, tsUUID, schema, 0, + null, null, null, reuseRowResult); + } + + Slice[] dataSlices = new Slice[data.getColumnsCount()]; + Slice[] varlenDataSlices = new Slice[data.getColumnsCount()]; + Slice[] nonNullBitmapSlices = new Slice[data.getColumnsCount()]; + + for (int i = 0; i < data.getColumnsCount(); i++) { + WireProtocol.ColumnarRowBlockPB.Column column = data.getColumns(i); + dataSlices[i] = callResponse.getSidecar(column.getDataSidecar()); + varlenDataSlices[i] = callResponse.getSidecar(column.getVarlenDataSidecar()); + nonNullBitmapSlices[i] = callResponse.getSidecar(column.getNonNullBitmapSidecar()); + } + int numRows = Math.toIntExact(data.getNumRows()); + + return new ColumnarRowResultIterator(elapsedMillis, tsUUID, schema, numRows, + dataSlices, varlenDataSlices, nonNullBitmapSlices, reuseRowResult); + } + + /** + * @return an empty row result iterator + */ + public static ColumnarRowResultIterator empty() { + return EMPTY; + } + + @Override + public RowResult next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + // If sharedRowResult is not null, we should reuse it for every next call. + if (sharedRowResult != null) { + this.sharedRowResult.advancePointerTo(this.currentRow++); + return sharedRowResult; + } else { + return new ColumnarRowResult(this.schema, this.data, this.varlenData, this.nonNullBitmaps, + this.currentRow++); + } + } + + @Override + public String toString() { + return "RowResultColumnarIterator for " + this.numRows + " rows"; + } + +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CommitTransactionRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CommitTransactionRequest.java new file mode 100644 index 0000000000..0cffde971a --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CommitTransactionRequest.java @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.transactions.TxnManager.CommitTransactionResponsePB; + +import java.util.Collection; +import java.util.List; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.transactions.TxnManager; +import org.apache.kudu.util.Pair; + +/** + * A wrapper class for kudu.transactions.TxnManagerService.CommitTransaction RPC. + */ +@InterfaceAudience.Private +class CommitTransactionRequest extends KuduRpc { + private static final List featureFlags = ImmutableList.of(); + final long txnId; + + CommitTransactionRequest( + KuduTable masterTable, Timer timer, long timeoutMillis, long txnId) { + super(masterTable, timer, timeoutMillis); + Preconditions.checkArgument(txnId > AsyncKuduClient.INVALID_TXN_ID); + this.txnId = txnId; + } + + @Override + Message createRequestPB() { + final TxnManager.CommitTransactionRequestPB.Builder b = + TxnManager.CommitTransactionRequestPB.newBuilder(); + b.setTxnId(txnId); + return b.build(); + } + + @Override + String serviceName() { + return TXN_MANAGER_SERVICE_NAME; + } + + @Override + String method() { + return "CommitTransaction"; + } + + @Override + Pair deserialize( + final CallResponse callResponse, String serverUUID) throws KuduException { + final CommitTransactionResponsePB.Builder b = + CommitTransactionResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), b); + CommitTransactionResponse response = new CommitTransactionResponse( + timeoutTracker.getElapsedMillis(), serverUUID); + return new Pair<>(response, b.hasError() ? b.getError() : null); + } + + @Override + Collection getRequiredFeatures() { + return featureFlags; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CommitTransactionResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CommitTransactionResponse.java new file mode 100644 index 0000000000..297b0f10b3 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CommitTransactionResponse.java @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class CommitTransactionResponse extends KuduRpcResponse { + /** + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param serverUUID UUID of the server that sent the response + */ + CommitTransactionResponse(long elapsedMillis, String serverUUID) { + super(elapsedMillis, serverUUID); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectToCluster.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectToCluster.java new file mode 100644 index 0000000000..e25ab11c68 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectToCluster.java @@ -0,0 +1,396 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import com.google.common.base.Functions; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.stumbleupon.async.Callback; +import com.stumbleupon.async.Deferred; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.Common.HostPortPB; +import org.apache.kudu.consensus.Metadata.RaftPeerPB.Role; +import org.apache.kudu.master.Master.ConnectToMasterResponsePB; +import org.apache.kudu.rpc.RpcHeader.ErrorStatusPB.RpcErrorCodePB; +import org.apache.kudu.util.NetUtil; +import org.apache.kudu.util.Pair; + +/** + * Class responsible for fanning out RPCs to all of the configured masters, + * finding a leader, and responding when the leader has been located. + */ +@InterfaceAudience.Private +final class ConnectToCluster { + + private static final Logger LOG = LoggerFactory.getLogger(ConnectToCluster.class); + + private final List masterAddrs; + private final Deferred responseD; + + // Used to avoid calling 'responseD' twice. + private final AtomicBoolean responseDCalled = new AtomicBoolean(false); + + /** Number of responses we've received so far */ + private final AtomicInteger countResponsesReceived = new AtomicInteger(0); + + // Exceptions received so far: kept for debugging purposes. + private final List exceptionsReceived = + Collections.synchronizedList(new ArrayList<>()); + + /** + * If we've received a response from a master which indicates the full + * list of masters in the cluster, it is stored here. Otherwise, null. + */ + private AtomicReference> knownMasters = new AtomicReference<>(); + + private int numMasters; + + /** + * Creates an object that holds the state needed to retrieve master table's location. + * @param masterAddrs Addresses of all master replicas that we want to retrieve the + * registration from. + */ + ConnectToCluster(List masterAddrs) { + this.masterAddrs = masterAddrs; + this.responseD = new Deferred<>(); + this.numMasters = masterAddrs.size(); + } + + @InterfaceAudience.LimitedPrivate("Test") + public Deferred getDeferred() { + return responseD; + } + + @InterfaceAudience.LimitedPrivate("Test") + List getExceptionsReceived() { + return exceptionsReceived; + } + + private static Deferred connectToMaster( + final KuduTable masterTable, + final RpcProxy masterProxy, + KuduRpc parentRpc, + Timer timer, + long defaultTimeoutMs) { + // TODO: Handle the situation when multiple in-flight RPCs all want to query the masters, + // basically reuse in some way the master permits. + long timeoutMillis = parentRpc == null ? defaultTimeoutMs : + parentRpc.timeoutTracker.getMillisBeforeTimeout(); + final ConnectToMasterRequest rpc = + new ConnectToMasterRequest(masterTable, timer, timeoutMillis); + rpc.setParentRpc(parentRpc); + Deferred d = rpc.getDeferred(); + rpc.attempt++; + masterProxy.sendRpc(rpc); + + // If we are connecting to an older version of Kudu, we'll get an invalid request + // error. In that case, we resend using the older version of the RPC. + d.addErrback(new Callback, Exception>() { + @Override + public Deferred call(Exception result) + throws Exception { + if (result instanceof RpcRemoteException) { + RpcRemoteException rre = (RpcRemoteException)result; + if (rre.getErrPB().getCode() == RpcErrorCodePB.ERROR_INVALID_REQUEST && + rre.getErrPB().getUnsupportedFeatureFlagsCount() > 0) { + AsyncKuduClient.LOG.debug("Falling back to GetMasterRegistration() RPC to connect " + + "to server running Kudu < 1.3."); + final Deferred newAttempt = + Preconditions.checkNotNull(rpc.getDeferred()); + rpc.setUseOldMethod(); + masterProxy.sendRpc(rpc); + return newAttempt; + } + } + return Deferred.fromError(result); + } + }); + + return d; + } + + /** + * Locate the leader master and retrieve the cluster information + * (see {@link ConnectToClusterResponse}. + * + * @param masterTable the "placeholder" table used by AsyncKuduClient + * @param masterAddresses the addresses of masters to fetch from + * @param parentRpc RPC that prompted a master lookup, can be null + * @param defaultTimeoutMs timeout to use for RPCs if the parentRpc has no timeout + * @param credentialsPolicy credentials policy to use for connection negotiation + * @return a Deferred object for the cluster connection status + */ + public static Deferred run( + KuduTable masterTable, + List masterAddresses, + KuduRpc parentRpc, + long defaultTimeoutMs, + Connection.CredentialsPolicy credentialsPolicy) { + ConnectToCluster connector = new ConnectToCluster(masterAddresses); + connector.connectToMasters(masterTable, parentRpc, + defaultTimeoutMs, credentialsPolicy); + return connector.responseD; + } + + @InterfaceAudience.LimitedPrivate("Test") + List> connectToMasters( + KuduTable masterTable, + KuduRpc parentRpc, + long defaultTimeoutMs, + Connection.CredentialsPolicy credentialsPolicy) { + // Try to connect to each master. The ConnectToCluster instance + // waits until it gets a good response before firing the returned + // deferred. + List> deferreds = new ArrayList<>(); + List> masterAddrsWithNames = new ArrayList<>(); + for (HostAndPort hostAndPort : masterAddrs) { + InetAddress[] inetAddrs = NetUtil.getAllInetAddresses(hostAndPort.getHost()); + if (inetAddrs != null) { + if (inetAddrs.length > 1) { + LOG.info("Specified master server address {} resolved to multiple IPs {}. " + + "Connecting to each one of them.", hostAndPort.getHost(), inetAddrs); + } + for (InetAddress addr : inetAddrs) { + masterAddrsWithNames.add( + new Pair<>(addr, new HostAndPort(addr.getHostAddress(), hostAndPort.getPort()))); + } + } else { + masterAddrsWithNames.add(new Pair<>(null, hostAndPort)); + } + } + + this.numMasters = masterAddrsWithNames.size(); + for (Pair masterPair : masterAddrsWithNames) { + InetAddress addr = masterPair.getFirst(); + HostAndPort hostAndPort = masterPair.getSecond(); + Deferred d; + if (addr != null) { + AsyncKuduClient client = masterTable.getAsyncClient(); + RpcProxy proxy = client.newMasterRpcProxy(hostAndPort, addr, credentialsPolicy); + d = connectToMaster(masterTable, proxy, parentRpc, client.getTimer(), defaultTimeoutMs); + } else { + String message = "Couldn't resolve this master's address " + hostAndPort.toString(); + LOG.warn(message); + Status statusIOE = Status.IOError(message); + d = Deferred.fromError(new NonRecoverableException(statusIOE)); + } + d.addCallbacks(callbackForNode(hostAndPort), errbackForNode(hostAndPort)); + deferreds.add(d); + } + return deferreds; + } + + /** + * Creates a callback for a ConnectToMaster RPC that was sent to 'hostAndPort'. + * @see ConnectToMasterCB + * @param hostAndPort Host and port for the RPC we're attaching this to. + * Host and port must be valid. + * @return The callback object that can be added to the RPC request. + */ + @InterfaceAudience.LimitedPrivate("Test") + Callback callbackForNode(HostAndPort hostAndPort) { + return new ConnectToMasterCB(hostAndPort); + } + + /** + * Creates an errback for a ConnectToMaster that was sent to 'hostAndPort'. + * @see ConnectToMasterErrCB + * @param hostAndPort Host and port for the RPC we're attaching this to. + * Used for debugging purposes. + * @return The errback object that can be added to the RPC request. + */ + @InterfaceAudience.LimitedPrivate("Test") + Callback errbackForNode(HostAndPort hostAndPort) { + return new ConnectToMasterErrCB(hostAndPort); + } + + /** + * Checks if we've already received a response or an exception from every master that + * we've sent a ConnectToMaster to. If so -- and no leader has been found + * (that is, 'responseD' was never called) -- pass a {@link NoLeaderFoundException} + * to responseD. + */ + private void incrementCountAndCheckExhausted() { + if (countResponsesReceived.incrementAndGet() == numMasters && + responseDCalled.compareAndSet(false, true)) { + // We want `allUnrecoverable` to only be true if all the masters came back with + // NonRecoverableException so that we know for sure we can't retry anymore. Just one master + // that replies with RecoverableException or with an ok response but is a FOLLOWER is + // enough to keep us retrying. + boolean allUnrecoverable = true; + if (exceptionsReceived.size() == countResponsesReceived.get()) { + for (Exception ex : exceptionsReceived) { + if (!(ex instanceof NonRecoverableException)) { + allUnrecoverable = false; + break; + } + } + } else { + allUnrecoverable = false; + } + + String allHosts = NetUtil.hostsAndPortsToString(masterAddrs); + if (allUnrecoverable) { + // This will stop retries. + String msg = String.format("Couldn't find a valid master in (%s). " + + "Exceptions received: [%s]", allHosts, + Joiner.on(", ").join(Lists.transform( + exceptionsReceived, Functions.toStringFunction()))); + Status s = Status.ServiceUnavailable(msg); + responseD.callback(new NonRecoverableException(s)); + } else { + // We couldn't find a leader master. A common case here is that the user only + // specified a subset of the masters, so check for that. We could try to do + // something fancier like compare the actual host/ports to see if they don't + // match, but it's possible that the hostnames used by clients are not the + // same as the hostnames that the servers use for each other in some network + // setups. + + List knownMastersLocal = knownMasters.get(); + if (knownMastersLocal != null && + knownMastersLocal.size() > numMasters) { + String msg = String.format( + "Could not connect to a leader master. " + + "Client configured with %s master(s) (%s) but cluster indicates it expects " + + "%s master(s) (%s)", + numMasters, allHosts, + knownMastersLocal.size(), + ProtobufHelper.hostPortPbListToString(knownMastersLocal)); + LOG.warn(msg); + Exception e = new NonRecoverableException(Status.ConfigurationError(msg)); + if (!LOG.isDebugEnabled()) { + // Stack trace is just internal guts of netty, etc, no need for the detail + // level. + e.setStackTrace(new StackTraceElement[]{}); + } + responseD.callback(e); + return; + } + + String message = String.format("Master config (%s) has no leader.", + allHosts); + Exception ex; + if (exceptionsReceived.isEmpty()) { + LOG.warn("None of the provided masters {} is a leader; will retry", allHosts); + ex = new NoLeaderFoundException(Status.ServiceUnavailable(message)); + } else { + LOG.warn("Unable to find the leader master {}; will retry", allHosts); + String joinedMsg = message + " Exceptions received: " + + Joiner.on(",").join(Lists.transform( + exceptionsReceived, Functions.toStringFunction())); + Status s = Status.ServiceUnavailable(joinedMsg); + ex = new NoLeaderFoundException(s, + exceptionsReceived.get(exceptionsReceived.size() - 1)); + } + responseD.callback(ex); + } + } + } + + /** + * Callback for each ConnectToCluster RPC sent in connectToMaster() above. + * If a request (paired to a specific master) returns a reply that indicates it's a leader, + * the callback in 'responseD' is invoked with an initialized GetTableLocationResponsePB + * object containing the leader's RPC address. + * If the master is not a leader, increment 'countResponsesReceived': if the count equals to + * the number of masters, pass {@link NoLeaderFoundException} into + * 'responseD' if no one else had called 'responseD' before; otherwise, do nothing. + */ + final class ConnectToMasterCB implements Callback { + private final HostAndPort hostAndPort; + + public ConnectToMasterCB(HostAndPort hostAndPort) { + this.hostAndPort = hostAndPort; + } + + @Override + public Void call(ConnectToMasterResponsePB r) throws Exception { + recordKnownMasters(r); + if (!r.getRole().equals(Role.LEADER)) { + incrementCountAndCheckExhausted(); + return null; + } + // We found a leader! + if (!responseDCalled.compareAndSet(false, true)) { + // Someone else already found a leader. This is somewhat unexpected + // because this means two nodes think they're the leader, but it's + // not impossible. We'll just ignore it. + LOG.debug("Callback already invoked, discarding response({}) from {}", r, hostAndPort); + return null; + } + + responseD.callback(new ConnectToClusterResponse(hostAndPort, r)); + return null; + } + + @Override + public String toString() { + return "ConnectToMasterCB for " + hostAndPort.toString(); + } + + private void recordKnownMasters(ConnectToMasterResponsePB r) { + // Old versions don't set this field. + if (r.getMasterAddrsCount() == 0) { + return; + } + + knownMasters.compareAndSet(null, r.getMasterAddrsList()); + } + } + + /** + * Errback for each ConnectToMaster RPC sent in connectToMaster() above. + * Stores each exception in 'exceptionsReceived'. Increments 'countResponseReceived': if + * the count is equal to the number of masters and no one else had called 'responseD' before, + * pass a {@link NoLeaderFoundException} into 'responseD'; otherwise, do + * nothing. + */ + final class ConnectToMasterErrCB implements Callback { + private final HostAndPort hostAndPort; + + public ConnectToMasterErrCB(HostAndPort hostAndPort) { + this.hostAndPort = hostAndPort; + } + + @Override + public Void call(Exception e) throws Exception { + LOG.info("Unable to connect to master {}: {}", hostAndPort, e.getMessage()); + exceptionsReceived.add(e); + incrementCountAndCheckExhausted(); + return null; + } + + @Override + public String toString() { + return "ConnectToMasterErrCB for " + hostAndPort.toString(); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectToClusterResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectToClusterResponse.java new file mode 100644 index 0000000000..9b965f718f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectToClusterResponse.java @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.protobuf.ByteString; + +import org.apache.kudu.Common.PartitionPB; +import org.apache.kudu.master.Master.ConnectToMasterResponsePB; +import org.apache.kudu.master.Master.GetTableLocationsResponsePB; +import org.apache.kudu.master.Master.TSInfoPB; +import org.apache.kudu.master.Master.TabletLocationsPB; +import org.apache.kudu.master.Master.TabletLocationsPB.InternedReplicaPB; + +/** + * The aggregated response after connecting to a cluster. This stores the + * identity of the leader master as well as the response from that master. + */ +class ConnectToClusterResponse { + private static final ByteString FAKE_TABLET_ID = ByteString.copyFromUtf8( + AsyncKuduClient.MASTER_TABLE_NAME_PLACEHOLDER); + + /** + * If the client caches master locations, the entries should not live longer + * than this timeout. Defaults to one hour. + */ + private static final int CACHE_TTL_MS = 60 * 60 * 1000; + + /** The host and port of the master that is currently leader */ + private final HostAndPort leaderHostAndPort; + /** The response from that master */ + private final ConnectToMasterResponsePB connectResponse; + + public ConnectToClusterResponse(HostAndPort hostAndPort, + ConnectToMasterResponsePB connectResponse) { + super(); + this.leaderHostAndPort = hostAndPort; + this.connectResponse = connectResponse; + } + + public ConnectToMasterResponsePB getConnectResponse() { + return connectResponse; + } + + public HostAndPort getLeaderHostAndPort() { + return leaderHostAndPort; + } + + /** + * Return the location of the located leader master as if this had been a normal + * tablet lookup. This is necessary so that we can cache the master location as + * if it were a tablet. + */ + public GetTableLocationsResponsePB getAsTableLocations() { + String fakeUuid = AsyncKuduClient.getFakeMasterUuid(leaderHostAndPort); + return GetTableLocationsResponsePB.newBuilder() + .addTabletLocations(TabletLocationsPB.newBuilder() + .setPartition(PartitionPB.newBuilder() + .setPartitionKeyStart(ByteString.EMPTY) + .setPartitionKeyEnd(ByteString.EMPTY)) + .setTabletId(FAKE_TABLET_ID) + .addInternedReplicas(InternedReplicaPB.newBuilder() + .setTsInfoIdx(0) + .setRole(connectResponse.getRole()))) + .addTsInfos(TSInfoPB.newBuilder() + .addRpcAddresses(ProtobufHelper.hostAndPortToPB(leaderHostAndPort)) + .setPermanentUuid(ByteString.copyFromUtf8(fakeUuid))) + .setTtlMillis(CACHE_TTL_MS) + .build(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectToMasterRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectToMasterRequest.java new file mode 100644 index 0000000000..18acebfdcf --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectToMasterRequest.java @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.master.Master.GetMasterRegistrationRequestPB; +import static org.apache.kudu.master.Master.GetMasterRegistrationResponsePB; + +import java.util.Collection; +import java.util.Collections; + +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.master.Master.ConnectToMasterResponsePB; +import org.apache.kudu.master.Master.MasterFeatures; +import org.apache.kudu.util.Pair; + +/** + * Package-private RPC that can only go to master. + */ +@InterfaceAudience.Private +public class ConnectToMasterRequest extends KuduRpc { + /** + * Kudu 1.2 and earlier use GetMasterRegistration to connect to the master. + */ + private static final String GET_MASTER_REGISTRATION = "GetMasterRegistration"; + /** + * Kudu 1.3 and later use a new ConnectToMaster RPC, which includes less + * irrelevant information and also returns security-related items. + */ + private static final String CONNECT_TO_MASTER = "ConnectToMaster"; + + /** + * We start by trying the new RPC, but if we fail, we'll retry this + * RPC with the old RPC. + */ + private String method = CONNECT_TO_MASTER; + + public ConnectToMasterRequest(KuduTable masterTable, + Timer timer, + long timeoutMillis) { + super(masterTable, timer, timeoutMillis); + // TODO(todd): get rid of 'masterTable' hack + } + + @Override + Message createRequestPB() { + return GetMasterRegistrationRequestPB.getDefaultInstance(); + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return method; + } + + @Override + Collection getRequiredFeatures() { + if (CONNECT_TO_MASTER.equals(method)) { + return Collections.singleton(MasterFeatures.CONNECT_TO_MASTER.getNumber()); + } + return Collections.emptySet(); + } + + @Override + Pair deserialize(CallResponse callResponse, + String tsUUID) throws KuduException { + if (CONNECT_TO_MASTER.equals(method)) { + return deserializeNewRpc(callResponse); + } + return deserializeOldRpc(callResponse); + } + + private Pair deserializeNewRpc( + CallResponse callResponse) { + + final ConnectToMasterResponsePB.Builder respBuilder = + ConnectToMasterResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), respBuilder); + return new Pair( + respBuilder.build(), + respBuilder.hasError() ? respBuilder.getError() : null); + } + + private Pair deserializeOldRpc( + CallResponse callResponse) { + final GetMasterRegistrationResponsePB.Builder resp = + GetMasterRegistrationResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), resp); + + // Translate to the new RPC result type. + ConnectToMasterResponsePB.Builder b = ConnectToMasterResponsePB.newBuilder(); + if (resp.hasRole()) { + b.setRole(resp.getRole()); + } + if (resp.hasError()) { + b.setError(resp.getError()); + } + return new Pair( + b.build(), + b.hasError() ? b.getError() : null); + } + + public void setUseOldMethod() { + this.method = GET_MASTER_REGISTRATION; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Connection.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Connection.java new file mode 100644 index 0000000000..c495b0d5df --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Connection.java @@ -0,0 +1,847 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.net.ConnectException; +import java.nio.channels.ClosedChannelException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantLock; +import javax.annotation.Nullable; +import javax.annotation.concurrent.GuardedBy; +import javax.net.ssl.SSLException; +import javax.net.ssl.SSLPeerUnverifiedException; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.stumbleupon.async.Callback; +import com.stumbleupon.async.Deferred; +import io.netty.bootstrap.Bootstrap; +import io.netty.buffer.Unpooled; +import io.netty.channel.Channel; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelFutureListener; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelInitializer; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.SimpleChannelInboundHandler; +import io.netty.channel.embedded.EmbeddedChannel; +import io.netty.channel.socket.SocketChannel; +import io.netty.handler.codec.LengthFieldBasedFrameDecoder; +import io.netty.handler.timeout.ReadTimeoutException; +import io.netty.handler.timeout.ReadTimeoutHandler; +import io.netty.util.concurrent.Future; +import io.netty.util.concurrent.GenericFutureListener; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.rpc.RpcHeader; +import org.apache.kudu.rpc.RpcHeader.RpcFeatureFlag; + +/** + * Class representing a connection from the client to a Kudu server (master or tablet server): + * a high-level wrapper for the TCP connection between the client and the server. + *

+ * It's a stateful handler that manages a connection to a Kudu server. + *

+ * This handler manages the RPC IDs, and keeps track of the RPCs in flight for which + * a response is currently awaited, as well as temporarily buffered RPCs that are waiting + * to be sent to the server. + *

+ * Acquiring the monitor on an object of this class will prevent it from + * accepting write requests as well as buffering requests if the underlying + * channel isn't connected. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +class Connection extends SimpleChannelInboundHandler { + /** + * Authentication credentials policy for negotiating outbound connections. Some requests + * (e.g. {@link ConnectToMasterRequest}) behave differently depending on the type of credentials + * used for authentication when negotiating on the underlying connection. If some particular + * behavior is required, it's necessary to specify appropriate credentials policy while creating + * an instance of this object. + */ + public enum CredentialsPolicy { + /** It's acceptable to use authentication credentials of any type, primary or secondary ones. */ + ANY_CREDENTIALS, + + /** + * Only primary credentials are acceptable. Primary credentials are Kerberos tickets, + * TLS certificate. Secondary credentials are authentication tokens: they are 'derived' + * in the sense that it's possible to acquire them using 'primary' credentials. + */ + PRIMARY_CREDENTIALS, + } + + /** Information on the target server. */ + private final ServerInfo serverInfo; + + /** Security context to use for connection negotiation. */ + private final SecurityContext securityContext; + + /** Credentials policy to use when authenticating. */ + private final CredentialsPolicy credentialsPolicy; + + /** The Netty client bootstrap used to configure and initialize a connected channel. */ + private final Bootstrap bootstrap; + + private final String saslProtocolName; + + private final boolean requireAuthentication; + + private final boolean requireEncryption; + + private final boolean encryptLoopback; + + /** The underlying Netty's socket channel. */ + private SocketChannel channel; + + /** + * Set to true when disconnect initiated explicitly from the client side. The channelDisconnected + * event handler then knows not to log any warning about unexpected disconnection from the peer. + */ + private volatile boolean explicitlyDisconnected = false; + + /** Logger: a sink for the log messages originated from this class. */ + private static final Logger LOG = LoggerFactory.getLogger(Connection.class); + + private static final byte RPC_CURRENT_VERSION = 9; + + /** Initial header sent by the client upon connection establishment. */ + private static final byte[] CONNECTION_HEADER = new byte[]{'h', 'r', 'p', 'c', + RPC_CURRENT_VERSION, // RPC version. + 0, + 0 + }; + + private static final String NEGOTIATION_TIMEOUT_HANDLER = "negotiation-timeout-handler"; + private final long negotiationTimeoutMs; + + /** Lock to guard access to some of the fields below. */ + private final ReentrantLock lock = new ReentrantLock(); + + /** The current state of this Connection object. */ + @GuardedBy("lock") + private State state; + + /** + * A hash table to store { callId, statusReportCallback } pairs, representing messages which have + * already been sent and pending responses from the server side. Once the server responds to a + * message, the corresponding entry is removed from the container and the response callback + * is invoked with the results represented by {@link CallResponseInfo}. + */ + @GuardedBy("lock") + private HashMap> inflightMessages = new HashMap<>(); + + /** Messages enqueued while the connection was not ready to start sending them over the wire. */ + @GuardedBy("lock") + private ArrayList queuedMessages = Lists.newArrayList(); + + /** The result of the successful connection negotiation. */ + @GuardedBy("lock") + private Negotiator.Success negotiationResult = null; + + /** The result of failed connection negotiation. */ + @GuardedBy("lock") + private Negotiator.Failure negotiationFailure = null; + + /** A monotonically increasing counter for RPC IDs. */ + @GuardedBy("lock") + private int nextCallId = 0; + + /** The future for the connection attempt. Set only once connect() is called. */ + @Nullable + @GuardedBy("lock") + private ChannelFuture connectFuture; + + /** + * Create a new Connection object to the specified destination. + * + * @param serverInfo the destination server + * @param securityContext security context to use for connection negotiation + * @param bootstrap Netty bootstrap to create corresponding Netty channel + * @param credentialsPolicy policy controlling which credentials to use while negotiating on the + * connection to the target server: + * if {@link CredentialsPolicy#PRIMARY_CREDENTIALS}, the authentication + * @param saslProtocolName SASL protocol name used when connecting to secure + * clusters. Must match the servers' service principal name. + */ + Connection(ServerInfo serverInfo, + SecurityContext securityContext, + Bootstrap bootstrap, + CredentialsPolicy credentialsPolicy, + String saslProtocolName, + boolean requireAuthentication, + boolean requireEncryption, + boolean encryptLoopback, + long negotiationTimeoutMs) { + this.serverInfo = serverInfo; + this.securityContext = securityContext; + this.saslProtocolName = saslProtocolName; + this.state = State.NEW; + this.credentialsPolicy = credentialsPolicy; + this.bootstrap = bootstrap.clone(); + this.bootstrap.handler(new ConnectionChannelInitializer()); + this.requireAuthentication = requireAuthentication; + this.requireEncryption = requireEncryption; + this.encryptLoopback = encryptLoopback; + this.negotiationTimeoutMs = negotiationTimeoutMs; + } + + /** {@inheritDoc} */ + @Override + @SuppressWarnings("FutureReturnValueIgnored") + public void channelActive(final ChannelHandlerContext ctx) { + lock.lock(); + try { + if (state == State.TERMINATED) { + return; + } + Preconditions.checkState(state == State.CONNECTING); + state = State.NEGOTIATING; + } finally { + lock.unlock(); + } + ctx.writeAndFlush(Unpooled.wrappedBuffer(CONNECTION_HEADER), ctx.voidPromise()); + Negotiator negotiator = new Negotiator(serverInfo.getAndCanonicalizeHostname(), securityContext, + (credentialsPolicy == CredentialsPolicy.PRIMARY_CREDENTIALS), saslProtocolName, + requireAuthentication, requireEncryption, encryptLoopback); + ctx.pipeline().addBefore(ctx.name(), "negotiation", negotiator); + negotiator.sendHello(ctx); + } + + /** {@inheritDoc} */ + @Override + public void channelInactive(final ChannelHandlerContext ctx) { + LOG.debug("{} handling channelInactive", getLogPrefix()); + String msg = "connection closed"; + // Connection failures are reported as channelClosed() before exceptionCaught() is called. + // We can detect this case by looking at whether connectFuture has been marked complete + // and grabbing the exception from there. + lock.lock(); + try { + if (connectFuture != null && connectFuture.cause() != null) { + msg = connectFuture.cause().toString(); + } + } finally { + lock.unlock(); + } + // No need to call super.channelInactive(ctx, e) -- there should be nobody in the upstream + // pipeline after Connection itself. So, just handle the close event ourselves. + cleanup(new RecoverableException(Status.NetworkError(msg))); + } + + /** {@inheritDoc} */ + @Override + @SuppressWarnings("FutureReturnValueIgnored") + public void channelRead0(ChannelHandlerContext ctx, Object m) throws Exception { + // Process the results of a successful negotiation. + if (m instanceof Negotiator.Success) { + lock.lock(); + try { + negotiationResult = (Negotiator.Success) m; + Preconditions.checkState(state == State.TERMINATED || inflightMessages.isEmpty()); + + // Before switching to the READY state, it's necessary to empty the queuedMessages. There + // might be concurrent activity on adding new messages into the queue if enqueueMessage() + // is called in the middle. + while (state != State.TERMINATED && !queuedMessages.isEmpty()) { + + // Register the messages into the inflightMessages before sending them to the wire. This + // is to be able to invoke appropriate callback when the response received. This should + // be done under the lock since the inflightMessages itself does not provide any + // concurrency guarantees. + List queued = queuedMessages; + for (final QueuedMessage qm : queued) { + Callback empty = inflightMessages.put( + qm.message.getHeaderBuilder().getCallId(), qm.cb); + Preconditions.checkState(empty == null); + } + queuedMessages = Lists.newArrayList(); + + lock.unlock(); + try { + // Send out the enqueued messages while not holding the lock. This is to avoid + // deadlock if channelDisconnected/channelClosed event happens and cleanup() is called. + for (final QueuedMessage qm : queued) { + sendCallToWire(qm.message); + } + } finally { + lock.lock(); + } + } + // The connection may have been terminated while the lock was dropped. + if (state == State.TERMINATED) { + return; + } + + Preconditions.checkState(state == State.NEGOTIATING); + + queuedMessages = null; + + // Drop the negotiation timeout handler from the pipeline. + ctx.pipeline().remove(NEGOTIATION_TIMEOUT_HANDLER); + + // Set the state to READY -- that means the incoming messages should be no longer put into + // the queuedMessages, but sent to wire right away (see the enqueueMessage() for details). + state = State.READY; + } finally { + lock.unlock(); + } + return; + } + + // Process the results of a failed negotiation. + if (m instanceof Negotiator.Failure) { + lock.lock(); + try { + if (state == State.TERMINATED) { + return; + } + Preconditions.checkState(state == State.NEGOTIATING); + Preconditions.checkState(inflightMessages.isEmpty()); + + state = State.NEGOTIATION_FAILED; + negotiationFailure = (Negotiator.Failure) m; + } finally { + lock.unlock(); + } + // Calling close() triggers the cleanup() which will handle the negotiation + // failure appropriately. + ctx.close(); + return; + } + + // Some other event which the connection does not handle. + if (!(m instanceof CallResponse)) { + ctx.fireChannelRead(m); + return; + } + + final CallResponse response = (CallResponse) m; + final RpcHeader.ResponseHeader header = response.getHeader(); + if (!header.hasCallId()) { + final int size = response.getTotalResponseSize(); + final String msg = getLogPrefix() + + " RPC response (size: " + size + ") doesn't" + " have callID: " + header; + LOG.error(msg); + throw new NonRecoverableException(Status.Incomplete(msg)); + } + + final int callId = header.getCallId(); + Callback responseCbk; + lock.lock(); + try { + if (state == State.TERMINATED) { + return; + } + Preconditions.checkState(state == State.READY); + responseCbk = inflightMessages.remove(callId); + } finally { + lock.unlock(); + } + + if (responseCbk == null) { + final String msg = getLogPrefix() + " invalid callID: " + callId; + LOG.error(msg); + // If we get a bad RPC ID back, we are probably somehow misaligned from + // the server. So, we disconnect the connection. + throw new NonRecoverableException(Status.IllegalState(msg)); + } + + if (!header.hasIsError() || !header.getIsError()) { + // The success case. + responseCbk.call(new CallResponseInfo(response, null)); + return; + } + + final RpcHeader.ErrorStatusPB.Builder errorBuilder = RpcHeader.ErrorStatusPB.newBuilder(); + KuduRpc.readProtobuf(response.getPBMessage(), errorBuilder); + final RpcHeader.ErrorStatusPB error = errorBuilder.build(); + RpcHeader.ErrorStatusPB.RpcErrorCodePB code = error.getCode(); + if (code.equals(RpcHeader.ErrorStatusPB.RpcErrorCodePB.ERROR_SERVER_TOO_BUSY) || + code.equals(RpcHeader.ErrorStatusPB.RpcErrorCodePB.ERROR_UNAVAILABLE)) { + responseCbk.call(new CallResponseInfo( + response, new RecoverableException(Status.ServiceUnavailable(error.getMessage())))); + return; + } + if (code.equals(RpcHeader.ErrorStatusPB.RpcErrorCodePB.ERROR_INVALID_AUTHORIZATION_TOKEN)) { + responseCbk.call(new CallResponseInfo( + response, new InvalidAuthzTokenException(Status.NotAuthorized(error.getMessage())))); + return; + } + + final String message = getLogPrefix() + " server sent error " + error.getMessage(); + LOG.error(message); // can be useful + responseCbk.call(new CallResponseInfo( + response, new RpcRemoteException(Status.RemoteError(message), error))); + } + + /** {@inheritDoc} */ + @Override + @SuppressWarnings("FutureReturnValueIgnored") + public void exceptionCaught(ChannelHandlerContext ctx, Throwable e) throws Exception { + KuduException error; + if (e instanceof KuduException) { + error = (KuduException) e; + } else if (e instanceof RejectedExecutionException) { + String message = String.format("%s RPC rejected by the executor (ignore if shutting down)", + getLogPrefix()); + error = new RecoverableException(Status.NetworkError(message), e); + LOG.warn(message, e); + } else if (e instanceof ReadTimeoutException) { + String message = String.format("%s encountered a read timeout; closing the channel", + getLogPrefix()); + error = new RecoverableException(Status.NetworkError(message), e); + LOG.debug(message); + } else if (e instanceof ClosedChannelException) { + String message = String.format( + explicitlyDisconnected ? "%s disconnected from peer" : "%s lost connection to peer", + getLogPrefix()); + error = new RecoverableException(Status.NetworkError(message), e); + LOG.info(message); + } else if (e instanceof ConnectException) { + String message = "Failed to connect to peer " + serverInfo + ": " + e.getMessage(); + error = new RecoverableException(Status.NetworkError(message), e); + LOG.info(message); + } else if (e instanceof SSLException && explicitlyDisconnected) { + // There's a race in Netty where, when we call Channel.close(), it tries + // to send a TLS 'shutdown' message and enters a shutdown state. If another + // thread races to send actual data on the channel, then Netty will get a + // bit confused that we are trying to send data and misinterpret it as a + // renegotiation attempt, and throw an SSLException. So, we just ignore any + // SSLException if we've already attempted to close, otherwise log the error. + error = new RecoverableException(Status.NetworkError( + String.format("%s disconnected from peer", getLogPrefix()))); + } else if (e instanceof SSLPeerUnverifiedException) { + String m = String.format("unable to verify identity of peer %s: %s", + serverInfo, e.getMessage()); + error = new NonRecoverableException(Status.NetworkError(m), e); + LOG.error(m, e); + } else { + // If the connection was explicitly disconnected via a call to disconnect(), we should + // have either gotten a ClosedChannelException or an SSLException. + assert !explicitlyDisconnected; + String channelInfo = ctx == null ? "" : String.format(" on %s", ctx.channel()); + String message = String.format( + "%s unexpected exception from downstream%s", getLogPrefix(), channelInfo); + error = new RecoverableException(Status.NetworkError(message), e); + LOG.error(message, e); + } + + cleanup(error); + // `ctx` is null when `exceptionCaught` is called from the `connectFuture` + // listener in `connect()`. + if (ctx != null) { + ctx.close(); + } + } + + /** Getter for the peer's end-point information */ + public ServerInfo getServerInfo() { + return serverInfo; + } + + /** The credentials policy used for the connection negotiation. */ + CredentialsPolicy getCredentialsPolicy() { + return credentialsPolicy; + } + + /** @return true iff the connection is in the TERMINATED state */ + boolean isTerminated() { + lock.lock(); + try { + return state == State.TERMINATED; + } finally { + lock.unlock(); + } + } + + /** + * TODO(aserbin) make it possible to avoid calling this when the server features are not known yet + * + * @return the set of server's features, if known; null otherwise + */ + @Nullable + Set getPeerFeatures() { + Set features = null; + lock.lock(); + try { + if (negotiationResult != null) { + features = negotiationResult.serverFeatures; + } + } finally { + lock.unlock(); + } + return features; + } + + /** @return string representation of the peer information suitable for logging */ + String getLogPrefix() { + return "[peer " + serverInfo + "]"; + } + + /** + * Enqueue outbound message for sending to the remote server via Kudu RPC. The enqueueMessage() + * accepts messages even if the connection hasn't yet been established: the enqueued messages + * are sent out as soon as the connection to the server is ready. The connection is initiated upon + * enqueuing the very first outbound message. + */ + void enqueueMessage(RpcOutboundMessage msg, Callback cb) + throws RecoverableException { + lock.lock(); + try { + if (state == State.TERMINATED) { + // The upper-level caller should handle the exception and retry using a new connection. + throw new RecoverableException(Status.IllegalState("connection is terminated")); + } + + if (state == State.NEW) { + // Schedule connecting to the server. + connect(); + } + + // Set the call identifier for the outgoing RPC. + final int callId = nextCallId++; + RpcHeader.RequestHeader.Builder headerBuilder = msg.getHeaderBuilder(); + headerBuilder.setCallId(callId); + + // Amend the timeout for the call, if necessary. + final int timeoutMs = headerBuilder.getTimeoutMillis(); + if (timeoutMs > 0) { + headerBuilder.setTimeoutMillis(timeoutMs); + } + + // If the connection hasn't been negotiated yet, add the message into the queuedMessages list. + // The elements of the queuedMessages list will be processed when the negotiation either + // succeeds or fails. + if (state != State.READY) { + queuedMessages.add(new QueuedMessage(msg, cb)); + return; + } + + assert state == State.READY; + // Register the message into the inflightMessages before sending it to the wire. + final Callback empty = inflightMessages.put(callId, cb); + Preconditions.checkState(empty == null); + } finally { + lock.unlock(); + } + + // It's time to initiate sending the message over the wire. This is done outside of the lock + // to prevent deadlocks due to the reverse order of locking while working with Connection.lock + // and the lower-level Netty locks. The other order of taking those two locks could happen + // upon receiving ChannelDisconnected or ChannelClosed events. Upon receiving those events, + // the low-level Netty lock is held and the channelDisconnected()/channelClosed() methods + // would call the cleanup() method. In its turn, the cleanup() method tries to acquire the + // Connection.lock lock, while the low-level Netty lock might be already acquired. + // + // More details and an example of a stack trace is available in KUDU-1894 comments. + sendCallToWire(msg); + } + + /** + * Triggers the channel to be disconnected, which will asynchronously cause all + * queued and in-flight RPCs to be failed. This method is idempotent. + * + * @return future object to wait on the disconnect completion, if necessary + */ + ChannelFuture disconnect() { + lock.lock(); + try { + LOG.debug("{} disconnecting while in state {}", getLogPrefix(), state); + explicitlyDisconnected = true; + // No connection has been made yet. + if (state == State.NEW) { + // Use an EmbeddedChannel to return a valid and immediately completed ChannelFuture. + return new EmbeddedChannel().disconnect(); + } else { + return connectFuture.channel().disconnect(); + } + } finally { + lock.unlock(); + } + } + + /** + * If open, forcefully shut down the connection to the server. This is the same as + * {@link #disconnect}, but it returns Deferred instead of ChannelFuture. + * + * @return deferred object for tracking the shutting down of this connection + */ + Deferred shutdown() { + final ChannelFuture disconnectFuture = disconnect(); + final Deferred d = new Deferred<>(); + disconnectFuture.addListener(new ShutdownListener(d)); + return d; + } + + /** + * A ChannelFutureListener that completes the passed deferred on completion. + */ + private static class ShutdownListener implements ChannelFutureListener { + + private final Deferred deferred; + + public ShutdownListener(Deferred deferred) { + this.deferred = deferred; + } + + @Override + public void operationComplete(final ChannelFuture future) { + if (future.isSuccess()) { + deferred.callback(null); + return; + } + final Throwable t = future.cause(); + if (t instanceof Exception) { + deferred.callback(t); + } else { + deferred.callback(new NonRecoverableException( + Status.IllegalState("failed to shutdown: " + this), t)); + } + } + } + + /** @return string representation of this object (suitable for printing into the logs, etc.) */ + @Override + public String toString() { + final StringBuilder buf = new StringBuilder(); + buf.append("Connection@") + .append(hashCode()) + .append("(channel=") + .append(channel) + .append(", uuid=") + .append(serverInfo.getUuid()); + int queuedMessagesNum = 0; + int inflightMessagesNum = 0; + lock.lock(); + try { + queuedMessagesNum = queuedMessages == null ? 0 : queuedMessages.size(); + inflightMessagesNum = inflightMessages == null ? 0 : inflightMessages.size(); + } finally { + lock.unlock(); + } + buf.append(", #queued=").append(queuedMessagesNum) + .append(", #inflight=").append(inflightMessagesNum) + .append(")"); + return buf.toString(); + } + + /** + * This is test-only method. + * + * @return true iff the connection is in the READY state + */ + @InterfaceAudience.LimitedPrivate("Test") + boolean isReady() { + lock.lock(); + try { + return state == State.READY; + } finally { + lock.unlock(); + } + } + + /** + * Start sending the message to the server over the wire. It's crucial to not hold the lock + * while doing so: see enqueueMessage() and KUDU-1894 for details. + */ + @SuppressWarnings("FutureReturnValueIgnored") + private void sendCallToWire(final RpcOutboundMessage msg) { + assert !lock.isHeldByCurrentThread(); + if (LOG.isTraceEnabled()) { + LOG.trace("{} sending {}", getLogPrefix(), msg); + } + channel.writeAndFlush(msg, channel.voidPromise()); + } + + /** + * Process the fact that the connection has been disconnected: update the state of this object and + * clean up any outstanding or lingering messages, notifying on the error via their status + * callbacks. The callee is supposed to handle the error and retry sending the messages, + * if needed. + * + * @param error the exception which caused the connection cleanup + */ + private void cleanup(KuduException error) { + List queued; + Map> inflight; + + boolean needNewAuthnToken = false; + lock.lock(); + try { + if (state == State.TERMINATED) { + // The cleanup has already run. + Preconditions.checkState(queuedMessages == null); + Preconditions.checkState(inflightMessages == null); + return; + } + if (state == State.NEGOTIATION_FAILED) { + Preconditions.checkState(negotiationFailure != null); + Preconditions.checkState(inflightMessages.isEmpty()); + needNewAuthnToken = negotiationFailure.status.getCode().equals( + RpcHeader.ErrorStatusPB.RpcErrorCodePB.FATAL_INVALID_AUTHENTICATION_TOKEN); + } + LOG.debug("{} cleaning up while in state {} due to: {}", + getLogPrefix(), state, error.getMessage()); + + queued = queuedMessages; + queuedMessages = null; + + inflight = inflightMessages; + inflightMessages = null; + + state = State.TERMINATED; + } finally { + lock.unlock(); + } + if (needNewAuthnToken) { + error = new InvalidAuthnTokenException(error.getStatus()); + } + + for (Callback cb : inflight.values()) { + try { + cb.call(new CallResponseInfo(null, error)); + } catch (Exception e) { + LOG.warn("{} exception while aborting in-flight call: {}", getLogPrefix(), e); + } + } + + if (queued != null) { + for (QueuedMessage qm : queued) { + try { + qm.cb.call(new CallResponseInfo(null, error)); + } catch (Exception e) { + LOG.warn("{} exception while aborting enqueued call: {}", getLogPrefix(), e); + } + } + } + } + + /** Initiate opening TCP connection to the server. */ + @GuardedBy("lock") + private void connect() { + LOG.debug("{} connecting to peer", getLogPrefix()); + Preconditions.checkState(lock.isHeldByCurrentThread()); + Preconditions.checkState(state == State.NEW); + state = State.CONNECTING; + connectFuture = bootstrap.connect(serverInfo.getResolvedAddress()); + connectFuture.addListener(new GenericFutureListener>() { + @Override + public void operationComplete(Future future) throws Exception { + if (future.isSuccess()) { + LOG.debug("{} Successfully connected to peer", getLogPrefix()); + return; + } + // If the connection failed, pass the exception to exceptionCaught to be handled. + final Throwable t = future.cause(); + exceptionCaught(null, t); + } + }); + channel = (SocketChannel) connectFuture.channel(); + } + + /** Enumeration to represent the internal state of the Connection object. */ + private enum State { + /** The object has just been created. */ + NEW, + + /** The establishment of TCP connection to the server has started. */ + CONNECTING, + + /** The connection negotiation has started. */ + NEGOTIATING, + + /** + * The underlying TCP connection has been dropped off due to negotiation error and there are + * enqueued messages to handle. Once connection negotiation fails, the Connection object + * handles the affected queued RPCs appropriately. If the negotiation failed due to invalid + * authn token error, the upper-level code may attempt to acquire a new authentication token + * in that case. The connection transitions into the TERMINATED state upon notifying the + * affected RPCs on the connection negotiation failure. + */ + NEGOTIATION_FAILED, + + /** The connection to the server is opened, negotiated, and ready to use. */ + READY, + + /** + * The TCP connection has been dropped off, the proper clean-up procedure has run and no queued + * nor in-flight messages are left. In this state, the object does not accept new messages, + * throwing RecoverableException upon call of the enqueueMessage() method. + */ + TERMINATED, + } + + /** + * The class to represent RPC response received from the remote server. + * If the {@code exception} is null, then it's a success case and the {@code response} contains + * the information on the response. Otherwise it's an error and the {@code exception} provides + * information on the error. For the recoverable error case, the {@code exception} is of + * {@link RecoverableException} type, otherwise it's of {@link NonRecoverableException} type. + */ + static final class CallResponseInfo { + public final CallResponse response; + public final KuduException exception; + + CallResponseInfo(CallResponse response, KuduException exception) { + this.response = response; + this.exception = exception; + } + } + + /** Internal class representing an enqueued outgoing message. */ + private static final class QueuedMessage { + private final RpcOutboundMessage message; + private final Callback cb; + + QueuedMessage(RpcOutboundMessage message, Callback cb) { + this.message = message; + this.cb = cb; + } + } + + private final class ConnectionChannelInitializer extends ChannelInitializer { + @Override + public void initChannel(Channel ch) throws Exception { + ChannelPipeline pipeline = ch.pipeline(); + pipeline.addFirst("decode-frames", new LengthFieldBasedFrameDecoder( + KuduRpc.MAX_RPC_SIZE, + 0, // length comes at offset 0 + 4, // length prefix is 4 bytes long + 0, // no "length adjustment" + 4 /* strip the length prefix */)); + pipeline.addLast("decode-inbound", new CallResponse.Decoder()); + pipeline.addLast("encode-outbound", new RpcOutboundMessage.Encoder()); + // Add a socket read timeout handler to function as a timeout for negotiation. + // The handler will be removed once the connection is negotiated. + pipeline.addLast(NEGOTIATION_TIMEOUT_HANDLER, + new ReadTimeoutHandler(negotiationTimeoutMs, TimeUnit.MILLISECONDS)); + pipeline.addLast("kudu-handler", Connection.this); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectionCache.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectionCache.java new file mode 100644 index 0000000000..17fac9693e --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ConnectionCache.java @@ -0,0 +1,185 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import javax.annotation.concurrent.GuardedBy; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.ImmutableList; +import com.stumbleupon.async.Deferred; +import io.netty.bootstrap.Bootstrap; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * The ConnectionCache is responsible for managing connections to Kudu masters and tablet servers. + * There should only be one instance of ConnectionCache per Kudu client, and it should not be + * shared between clients. + *

+ * Disconnected instances of the {@link Connection} class are replaced in the cache with new ones + * when {@link #getConnection(ServerInfo, Connection.CredentialsPolicy)} method is called with the + * same destination and matching credentials policy. Since the map is keyed by the address of the + * target server, the theoretical maximum number of elements in the cache is twice the number of + * all servers in the cluster (i.e. both masters and tablet servers). However, in practice it's + * 2 * number of masters + number of tablet servers since tablet servers do not require connections + * negotiated with primary credentials. + * + * This class is thread-safe. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +class ConnectionCache { + + /** Security context to use for connection negotiation. */ + private final SecurityContext securityContext; + + /** Netty's bootstrap to use by connections. */ + private final Bootstrap bootstrap; + + private final String saslProtocolName; + + private boolean requireAuthentication; + + private boolean requireEncryption; + + private boolean encryptLoopback; + + private long negotiationTimeoutMs; + + /** + * Container mapping server IP/port into the established connection from the client to the + * server. It may be up to two connections per server: one established with secondary + * credentials (e.g. authn token), another with primary ones (e.g. Kerberos credentials). + */ + @GuardedBy("connsByAddress") + private final HashMultimap connsByAddress = + HashMultimap.create(); + + /** Create a new empty ConnectionCache given the specified parameters. */ + ConnectionCache(SecurityContext securityContext, + Bootstrap bootstrap, + String saslProtocolName, + boolean requireAuthentication, + boolean requireEncryption, + boolean encryptLoopback, + long negotiationTimeoutMs) { + this.securityContext = securityContext; + this.bootstrap = bootstrap; + this.saslProtocolName = saslProtocolName; + this.requireAuthentication = requireAuthentication; + this.requireEncryption = requireEncryption; + this.encryptLoopback = encryptLoopback; + this.negotiationTimeoutMs = negotiationTimeoutMs; + } + + /** + * Get connection to the specified server. If no connection exists or the existing connection + * is already disconnected, then create a new connection to the specified server. The newly + * created connection is not negotiated until enqueuing the first RPC to the target server. + * + * @param serverInfo the server end-point to connect to + * @param credentialsPolicy authentication credentials policy for the connection negotiation + * @return instance of this object with the specified destination + */ + public Connection getConnection(final ServerInfo serverInfo, + Connection.CredentialsPolicy credentialsPolicy) { + Connection result = null; + synchronized (connsByAddress) { + // Create and register a new connection object into the cache if one of the following is true: + // + // * There isn't a registered connection to the specified destination. + // + // * There is a connection to the specified destination, but it's in TERMINATED state. + // Such connections cannot be used again and should be recycled. The connection cache + // lazily removes such entries. + // + // * A connection negotiated with primary credentials is requested but the only registered + // one does not have such property. In this case, the already existing connection + // (negotiated with secondary credentials, i.e. authn token) is kept in the cache and + // a new one is created to be open and negotiated with primary credentials. The newly + // created connection is put into the cache along with old one. We don't do anything + // special to the old connection to shut it down since it may be still in use. We rely + // on the server to close inactive connections in accordance with their TTL settings. + // + final Set connections = connsByAddress.get(serverInfo.getResolvedAddress()); + Iterator it = connections.iterator(); + while (it.hasNext()) { + Connection c = it.next(); + if (c.isTerminated()) { + // Lazy recycling of the terminated connections: removing them from the cache upon + // an attempt to connect to the same destination again. + it.remove(); + continue; + } + if (credentialsPolicy == Connection.CredentialsPolicy.ANY_CREDENTIALS || + credentialsPolicy == c.getCredentialsPolicy()) { + // If the connection policy allows for using any credentials or the connection is + // negotiated using the given credentials type, this is the connection we are looking for. + result = c; + } + } + if (result == null) { + result = new Connection(serverInfo, + securityContext, + bootstrap, + credentialsPolicy, + saslProtocolName, + requireAuthentication, + requireEncryption, + encryptLoopback, + negotiationTimeoutMs); + connections.add(result); + // There can be at most 2 connections to the same destination: one with primary and another + // with secondary credentials. + assert connections.size() <= 2; + } + } + + return result; + } + + /** Asynchronously terminate every connection. This cancels all the pending and in-flight RPCs. */ + Deferred> disconnectEverything() { + synchronized (connsByAddress) { + List> deferreds = new ArrayList<>(connsByAddress.size()); + for (Connection c : connsByAddress.values()) { + deferreds.add(c.shutdown()); + } + return Deferred.group(deferreds); + } + } + + /** + * Return a copy of the all-connections-list. This method is exposed only to allow + * {@link AsyncKuduClient} to forward it, so tests could get access to the underlying elements + * of the cache. + * + * @return a copy of the list of all connections in the connection cache + */ + @InterfaceAudience.LimitedPrivate("Test") + List getConnectionListCopy() { + synchronized (connsByAddress) { + return ImmutableList.copyOf(connsByAddress.values()); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CreateTableOptions.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CreateTableOptions.java new file mode 100644 index 0000000000..d577a590fd --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CreateTableOptions.java @@ -0,0 +1,362 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Common; +import org.apache.kudu.Schema; +import org.apache.kudu.master.Master; + +/** + * This is a builder class for all the options that can be provided while creating a table. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class CreateTableOptions { + + private final List splitRows = Lists.newArrayList(); + private final List rangePartitions = Lists.newArrayList(); + private final List customRangePartitions = + Lists.newArrayList(); // range partitions with custom hash schemas + private Master.CreateTableRequestPB.Builder pb = Master.CreateTableRequestPB.newBuilder(); + private boolean wait = true; + private boolean isPbGenerationDone = false; + + /** + * Add a set of hash partitions to the table. + * + * Each column must be a part of the table's primary key, and an individual + * column may only appear in a single hash component. + * + * For each set of hash partitions added to the table, the total number of + * table partitions is multiplied by the number of buckets. For example, if a + * table is created with 3 split rows, and two hash partitions with 4 and 5 + * buckets respectively, the total number of table partitions will be 80 + * (4 range partitions * 4 hash buckets * 5 hash buckets). + * + * @param columns the columns to hash + * @param buckets the number of buckets to hash into + * @return this instance + */ + public CreateTableOptions addHashPartitions(List columns, int buckets) { + addHashPartitions(columns, buckets, 0); + return this; + } + + /** + * Add a set of hash partitions to the table. + * + * This constructor takes a seed value, which can be used to randomize the + * mapping of rows to hash buckets. Setting the seed may provide some + * amount of protection against denial of service attacks when the hashed + * columns contain user provided values. + * + * @param columns the columns to hash + * @param buckets the number of buckets to hash into + * @param seed a hash seed + * @return this instance + */ + public CreateTableOptions addHashPartitions(List columns, int buckets, int seed) { + Common.PartitionSchemaPB.HashBucketSchemaPB.Builder hashBucket = + pb.getPartitionSchemaBuilder().addHashSchemaBuilder(); + for (String column : columns) { + hashBucket.addColumnsBuilder().setName(column); + } + hashBucket.setNumBuckets(buckets); + hashBucket.setSeed(seed); + return this; + } + + /** + * Set the columns on which the table will be range-partitioned. + * + * Every column must be a part of the table's primary key. If not set, + * the table is range partitioned by the primary key columns with a single + * unbounded partition. If called with an empty vector, the table will be + * created without range partitioning. + * + * Tables must be created with either range, hash, or range and hash + * partitioning. To force the use of a single tablet (not recommended), + * call this method with an empty list and set no split rows and no hash + * partitions. + * + * @param columns the range partitioned columns + * @return this instance + */ + public CreateTableOptions setRangePartitionColumns(List columns) { + Common.PartitionSchemaPB.RangeSchemaPB.Builder rangePartition = + pb.getPartitionSchemaBuilder().getRangeSchemaBuilder(); + for (String column : columns) { + rangePartition.addColumnsBuilder().setName(column); + } + return this; + } + + /** + * Add a range partition to the table with an inclusive lower bound and an + * exclusive upper bound. + * + * If either row is empty, then that end of the range will be unbounded. If a + * range column is missing a value, the logical minimum value for that column + * type will be used as the default. + * + * Multiple range bounds may be added, but they must not overlap. All split + * rows must fall in one of the range bounds. The lower bound must be less + * than the upper bound. + * + * If not provided, the table's range will be unbounded. + * + * @param lower the inclusive lower bound + * @param upper the exclusive upper bound + * @return this instance + */ + public CreateTableOptions addRangePartition(PartialRow lower, + PartialRow upper) { + return addRangePartition(lower, upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + } + + /** + * Add a range partition partition to the table with a lower bound and upper + * bound. + * + * If either row is empty, then that end of the range will be unbounded. If a + * range column is missing a value, the logical minimum value for that column + * type will be used as the default. + * + * Multiple range bounds may be added, but they must not overlap. All split + * rows must fall in one of the range bounds. The lower bound must be less + * than the upper bound. + * + * If not provided, the table's range will be unbounded. + * + * @param lower the lower bound + * @param upper the upper bound + * @param lowerBoundType the type of the lower bound, either inclusive or exclusive + * @param upperBoundType the type of the upper bound, either inclusive or exclusive + * @return this instance + */ + public CreateTableOptions addRangePartition(PartialRow lower, + PartialRow upper, + RangePartitionBound lowerBoundType, + RangePartitionBound upperBoundType) { + rangePartitions.add(new RangePartition(lower, upper, lowerBoundType, upperBoundType)); + return this; + } + + /** + * Add range partition with custom hash schema. + * + * @param rangePartition range partition with custom hash schema + * @return this CreateTableOptions object modified accordingly + */ + public CreateTableOptions addRangePartition(RangePartitionWithCustomHashSchema rangePartition) { + if (!splitRows.isEmpty()) { + throw new IllegalArgumentException( + "no range partitions with custom hash schema are allowed when using " + + "split rows to define range partitioning for a table"); + } + customRangePartitions.add(rangePartition); + pb.getPartitionSchemaBuilder().addCustomHashSchemaRanges(rangePartition.toPB()); + return this; + } + + /** + * Add a range partition split. The split row must fall in a range partition, + * and causes the range partition to split into two contiguous range partitions. + * The row may be reused or modified safely after this call without changing + * the split point. + * + * @param row a key row for the split point + * @return this instance + */ + public CreateTableOptions addSplitRow(PartialRow row) { + if (!customRangePartitions.isEmpty()) { + throw new IllegalArgumentException( + "no split rows are allowed to define range partitioning for a table " + + "when range partitions with custom hash schema are present"); + } + splitRows.add(new PartialRow(row)); + return this; + } + + /** + * Sets the number of replicas that each tablet will have. If not specified, it uses the + * server-side default which is usually 3 unless changed by an administrator. + * + * @param numReplicas the number of replicas to use + * @return this instance + */ + public CreateTableOptions setNumReplicas(int numReplicas) { + pb.setNumReplicas(numReplicas); + return this; + } + + /** + * Sets the dimension label for all tablets created at table creation time. + * + * By default, the master will try to place newly created tablet replicas on tablet + * servers with a small number of tablet replicas. If the dimension label is provided, + * newly created replicas will be evenly distributed in the cluster based on the dimension + * label. In other words, the master will try to place newly created tablet replicas on + * tablet servers with a small number of tablet replicas belonging to this dimension label. + * + * @param dimensionLabel the dimension label for the tablet to be created. + * @return this instance + */ + public CreateTableOptions setDimensionLabel(String dimensionLabel) { + Preconditions.checkArgument(dimensionLabel != null, + "dimension label must not be null"); + pb.setDimensionLabel(dimensionLabel); + return this; + } + + /** + * Sets the table's extra configuration properties. + * + * If the value of the kv pair is empty, the property will be ignored. + * + * @param extraConfig the table's extra configuration properties + * @return this instance + */ + public CreateTableOptions setExtraConfigs(Map extraConfig) { + pb.putAllExtraConfigs(extraConfig); + return this; + } + + /** + * Whether to wait for the table to be fully created before this create + * operation is considered to be finished. + *

+ * If false, the create will finish quickly, but subsequent row operations + * may take longer as they may need to wait for portions of the table to be + * fully created. + *

+ * If true, the create will take longer, but the speed of subsequent row + * operations will not be impacted. + *

+ * If not provided, defaults to true. + *

+ * @param wait whether to wait for the table to be fully created + * @return this instance + */ + public CreateTableOptions setWait(boolean wait) { + this.wait = wait; + return this; + } + + /** + * Set the table owner as the provided username. + * Overrides the default of the currently logged-in username or Kerberos principal. + * + * This is an unstable method because it is not yet clear whether this should + * be supported directly in the long run, rather than requiring the table creator + * to re-assign ownership explicitly. + * + * @param owner the username to set as the table owner. + * @return this instance + */ + public CreateTableOptions setOwner(String owner) { + pb.setOwner(owner); + return this; + } + + /** + * Set the table comment. + * + * @param comment the table comment + * @return this instance + */ + public CreateTableOptions setComment(String comment) { + pb.setComment(comment); + return this; + } + + Master.CreateTableRequestPB.Builder getBuilder() { + if (isPbGenerationDone) { + return pb; + } + + if (!splitRows.isEmpty() && !customRangePartitions.isEmpty()) { + throw new IllegalArgumentException( + "no split rows are allowed to define range partitioning for a table " + + "when range partitions with custom hash schema are present"); + } + if (customRangePartitions.isEmpty()) { + if (!splitRows.isEmpty() || !rangePartitions.isEmpty()) { + pb.setSplitRowsRangeBounds(new Operation.OperationsEncoder() + .encodeRangePartitions(rangePartitions, splitRows)); + } + } else { + // With the presence of a range with custom hash schema when the + // table-wide hash schema is used for a particular range, add proper + // element into PartitionSchemaPB::custom_hash_schema_ranges to satisfy + // the convention used by the backend. Do so for all the ranges with + // table-wide hash schemas. + for (RangePartition p : rangePartitions) { + org.apache.kudu.Common.PartitionSchemaPB.RangeWithHashSchemaPB.Builder b = + pb.getPartitionSchemaBuilder().addCustomHashSchemaRangesBuilder(); + // Set the hash schema for the range. + for (org.apache.kudu.Common.PartitionSchemaPB.HashBucketSchemaPB hashSchema : + pb.getPartitionSchemaBuilder().getHashSchemaList()) { + b.addHashSchema(hashSchema); + } + b.setRangeBounds( + new Operation.OperationsEncoder().encodeLowerAndUpperBounds( + p.lowerBound, p.upperBound, p.lowerBoundType, p.upperBoundType)); + } + } + isPbGenerationDone = true; + return pb; + } + + List getRequiredFeatureFlags(Schema schema) { + List requiredFeatureFlags = new ArrayList<>(); + if (schema.hasAutoIncrementingColumn()) { + requiredFeatureFlags.add( + Integer.valueOf(Master.MasterFeatures.AUTO_INCREMENTING_COLUMN_VALUE)); + } + if (schema.hasImmutableColumns()) { + requiredFeatureFlags.add( + Integer.valueOf(Master.MasterFeatures.IMMUTABLE_COLUMN_ATTRIBUTE_VALUE)); + } + if (!rangePartitions.isEmpty() || !customRangePartitions.isEmpty()) { + requiredFeatureFlags.add(Integer.valueOf(Master.MasterFeatures.RANGE_PARTITION_BOUNDS_VALUE)); + } + if (!customRangePartitions.isEmpty()) { + requiredFeatureFlags.add( + Integer.valueOf(Master.MasterFeatures.RANGE_SPECIFIC_HASH_SCHEMA_VALUE)); + } + + return requiredFeatureFlags; + } + + boolean shouldWait() { + return wait; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CreateTableRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CreateTableRequest.java new file mode 100644 index 0000000000..633bfad4d6 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CreateTableRequest.java @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.Collection; +import java.util.EnumSet; +import java.util.List; + +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.Schema; +import org.apache.kudu.client.ProtobufHelper.SchemaPBConversionFlags; +import org.apache.kudu.master.Master; +import org.apache.kudu.util.Pair; + +/** + * RPC to create new tables + */ +@InterfaceAudience.Private +class CreateTableRequest extends KuduRpc { + + static final String CREATE_TABLE = "CreateTable"; + + private final Schema schema; + private final String name; + private final Master.CreateTableRequestPB.Builder builder; + private final List featureFlags; + + CreateTableRequest(KuduTable masterTable, + String name, + Schema schema, + CreateTableOptions cto, + Timer timer, + long timeoutMillis) { + super(masterTable, timer, timeoutMillis); + this.schema = schema; + this.name = name; + this.builder = cto.getBuilder(); + featureFlags = cto.getRequiredFeatureFlags(schema); + } + + @Override + Message createRequestPB() { + this.builder.setName(this.name); + this.builder.setSchema( + ProtobufHelper.schemaToPb(this.schema, + EnumSet.of(SchemaPBConversionFlags.SCHEMA_PB_WITHOUT_ID))); + return this.builder.build(); + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return CREATE_TABLE; + } + + @Override + Pair deserialize(final CallResponse callResponse, + String tsUUID) throws KuduException { + final Master.CreateTableResponsePB.Builder builder = Master.CreateTableResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), builder); + CreateTableResponse response = + new CreateTableResponse( + timeoutTracker.getElapsedMillis(), + tsUUID, + builder.getTableId().toStringUtf8()); + return new Pair( + response, builder.hasError() ? builder.getError() : null); + } + + @Override + Collection getRequiredFeatures() { + return featureFlags; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CreateTableResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CreateTableResponse.java new file mode 100644 index 0000000000..8cf41bc552 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/CreateTableResponse.java @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class CreateTableResponse extends KuduRpcResponse { + private final String tableId; + + /** + * @param elapsedMillis Time in milliseconds since RPC creation to now. + */ + CreateTableResponse(long elapsedMillis, String tsUUID, String tableId) { + super(elapsedMillis, tsUUID); + this.tableId = tableId; + } + + /** + * @return the ID of the created table + */ + public String getTableId() { + return tableId; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Delete.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Delete.java new file mode 100644 index 0000000000..654f7b027c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Delete.java @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Class of Operation for whole row removals. + * Only columns which are part of the key can be set. + * Instances of this class should not be reused. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class Delete extends Operation { + + Delete(KuduTable table) { + super(table); + } + + @Override + ChangeType getChangeType() { + return ChangeType.DELETE; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/DeleteIgnore.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/DeleteIgnore.java new file mode 100644 index 0000000000..10e3cd2c81 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/DeleteIgnore.java @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Class of Operation for whole row removals ignoring missing rows. + * Only columns which are part of the key can be set. + * Instances of this class should not be reused. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class DeleteIgnore extends Operation { + + DeleteIgnore(KuduTable table) { + super(table); + } + + @Override + ChangeType getChangeType() { + return ChangeType.DELETE_IGNORE; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/DeleteTableRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/DeleteTableRequest.java new file mode 100644 index 0000000000..5ecf13a701 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/DeleteTableRequest.java @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.master.Master; +import org.apache.kudu.util.Pair; + +/** + * RPC to delete tables + */ +@InterfaceAudience.Private +class DeleteTableRequest extends KuduRpc { + + static final String DELETE_TABLE = "DeleteTable"; + + private final String name; + + private int reserveSeconds = -1; + + DeleteTableRequest(KuduTable table, + String name, + Timer timer, + long timeoutMillis, + int reserveSeconds) { + super(table, timer, timeoutMillis); + this.name = name; + this.reserveSeconds = reserveSeconds; + } + + DeleteTableRequest(KuduTable table, + String name, + Timer timer, + long timeoutMillis) { + super(table, timer, timeoutMillis); + this.name = name; + } + + @Override + Message createRequestPB() { + final Master.DeleteTableRequestPB.Builder builder = Master.DeleteTableRequestPB.newBuilder(); + Master.TableIdentifierPB tableID = + Master.TableIdentifierPB.newBuilder().setTableName(name).build(); + builder.setTable(tableID); + if (reserveSeconds >= 0) { + builder.setReserveSeconds(reserveSeconds); + } + return builder.build(); + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return DELETE_TABLE; + } + + @Override + Pair deserialize(CallResponse callResponse, + String tsUUID) throws KuduException { + final Master.DeleteTableResponsePB.Builder builder = Master.DeleteTableResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), builder); + DeleteTableResponse response = + new DeleteTableResponse(timeoutTracker.getElapsedMillis(), tsUUID); + return new Pair( + response, builder.hasError() ? builder.getError() : null); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/DeleteTableResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/DeleteTableResponse.java new file mode 100644 index 0000000000..403ef41380 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/DeleteTableResponse.java @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class DeleteTableResponse extends KuduRpcResponse { + + /** + * @param elapsedMillis Time in milliseconds since RPC creation to now. + */ + DeleteTableResponse(long elapsedMillis, String tsUUID) { + super(elapsedMillis, tsUUID); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ErrorCollector.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ErrorCollector.java new file mode 100644 index 0000000000..3a334f197a --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ErrorCollector.java @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayDeque; +import java.util.Queue; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Class that helps tracking row errors. All methods are thread-safe. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class ErrorCollector { + private final Queue errorQueue; + private int maxCapacity; + private boolean overflowed; + + /** + * Create a new error collector with a maximum capacity. + * @param maxCapacity how many errors can be stored, has to be higher than 0 + */ + public ErrorCollector(int maxCapacity) { + Preconditions.checkArgument(maxCapacity > 0, "Need to be able to store at least one row error"); + this.maxCapacity = maxCapacity; + this.errorQueue = new ArrayDeque<>(maxCapacity); + } + + /** + * Add a new error to this collector. If it is already at max capacity, the oldest error will be + * discarded before the new one is added. + * @param rowError a row error to collect + */ + public synchronized void addError(RowError rowError) { + if (errorQueue.size() >= maxCapacity) { + errorQueue.poll(); + overflowed = true; + } + errorQueue.add(rowError); + } + + /** + * Get the current count of collected row errors. Cannot be greater than the max capacity this + * instance was configured with. + * @return the count of errors + */ + public synchronized int countErrors() { + return errorQueue.size(); + } + + /** + * Get all the errors that have been collected and an indication if the list overflowed. + * The list of errors cleared and the overflow state is reset. + * @return an object that contains both the list of row errors and the overflow status + */ + public synchronized RowErrorsAndOverflowStatus getErrors() { + RowError[] returnedErrors = new RowError[errorQueue.size()]; + errorQueue.toArray(returnedErrors); + errorQueue.clear(); + + RowErrorsAndOverflowStatus returnObject = + new RowErrorsAndOverflowStatus(returnedErrors, overflowed); + overflowed = false; + return returnObject; + } + + /** + * Resize ErrorCollector. If size < errorQueue.size(), + * the oldest errors will be discarded and overflowed will be set; + */ + public synchronized void resize(int size) { + Preconditions.checkArgument(size > 0, "Need to be able to store at least one row error"); + if (size == maxCapacity) { + return; + } + + if (size < maxCapacity) { + int trimmedErrors = errorQueue.size() - size; + if (trimmedErrors > 0) { + overflowed = true; + } else { + trimmedErrors = 0; + } + + for (int i = 0; i < trimmedErrors; ++i) { + errorQueue.poll(); + } + } + + maxCapacity = size; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ExternalConsistencyMode.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ExternalConsistencyMode.java new file mode 100644 index 0000000000..22ccd95d12 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ExternalConsistencyMode.java @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Common; + +/** + * The possible external consistency modes on which Kudu operates. + * See {@code src/kudu/common/common.proto} for a detailed explanations on the + * meaning and implications of each mode. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum ExternalConsistencyMode { + CLIENT_PROPAGATED(Common.ExternalConsistencyMode.CLIENT_PROPAGATED), + COMMIT_WAIT(Common.ExternalConsistencyMode.COMMIT_WAIT); + + private final Common.ExternalConsistencyMode pbVersion; + ExternalConsistencyMode(Common.ExternalConsistencyMode pbVersion) { + this.pbVersion = pbVersion; + } + + @InterfaceAudience.Private + public Common.ExternalConsistencyMode pbVersion() { + return pbVersion; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/FaultTolerantScannerExpiredException.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/FaultTolerantScannerExpiredException.java new file mode 100644 index 0000000000..8216b41dc6 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/FaultTolerantScannerExpiredException.java @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * A scanner expired exception only used for fault tolerant scanner. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +@SuppressWarnings("serial") +class FaultTolerantScannerExpiredException extends RecoverableException { + /** + * Constructor. + * @param status status object containing the reason for the exception + * trace + */ + FaultTolerantScannerExpiredException(Status status) { + super(status); + } + + /** + * Constructor. + * @param status status object containing the reason for the exception + * @param cause the exception that caused this one to be thrown + */ + FaultTolerantScannerExpiredException(Status status, Throwable cause) { + super(status, cause); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableLocationsRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableLocationsRequest.java new file mode 100644 index 0000000000..7542e106c1 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableLocationsRequest.java @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Message; +import com.google.protobuf.UnsafeByteOperations; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.master.Master; +import org.apache.kudu.util.Pair; + +/** + * Package-private RPC that can only go to a master. + */ +@InterfaceAudience.Private +class GetTableLocationsRequest extends KuduRpc { + + private final byte[] startPartitionKey; + private final byte[] endKey; + private final String tableId; + private final int maxReturnedLocations; + + GetTableLocationsRequest(KuduTable table, + byte[] startPartitionKey, + byte[] endPartitionKey, + String tableId, + int maxReturnedLocations, + Timer timer, + long timeoutMillis) { + super(table, timer, timeoutMillis); + if (startPartitionKey != null && endPartitionKey != null && + Bytes.memcmp(startPartitionKey, endPartitionKey) > 0) { + throw new IllegalArgumentException( + "The start partition key must be smaller or equal to the end partition key"); + } + this.startPartitionKey = startPartitionKey; + this.endKey = endPartitionKey; + this.tableId = tableId; + this.maxReturnedLocations = maxReturnedLocations; + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return "GetTableLocations"; + } + + @Override + Pair deserialize( + final CallResponse callResponse, String tsUUID) + throws KuduException { + Master.GetTableLocationsResponsePB.Builder builder = Master.GetTableLocationsResponsePB + .newBuilder(); + readProtobuf(callResponse.getPBMessage(), builder); + Master.GetTableLocationsResponsePB resp = builder.build(); + return new Pair( + resp, builder.hasError() ? builder.getError() : null); + } + + @Override + Message createRequestPB() { + final Master.GetTableLocationsRequestPB.Builder builder = Master + .GetTableLocationsRequestPB.newBuilder(); + builder.setTable(Master.TableIdentifierPB.newBuilder() + .setTableId(ByteString.copyFromUtf8(tableId))); + if (startPartitionKey != null) { + builder.setPartitionKeyStart(UnsafeByteOperations.unsafeWrap(startPartitionKey)); + } + if (endKey != null) { + builder.setPartitionKeyEnd(UnsafeByteOperations.unsafeWrap(endKey)); + } + builder.setMaxReturnedLocations(maxReturnedLocations); + builder.setInternTsInfosInResponse(true); + return builder.build(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableSchemaRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableSchemaRequest.java new file mode 100644 index 0000000000..91eac64157 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableSchemaRequest.java @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.master.Master.GetTableSchemaRequestPB; +import static org.apache.kudu.master.Master.GetTableSchemaResponsePB; +import static org.apache.kudu.master.Master.TableIdentifierPB; + +import java.util.Collection; +import java.util.List; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.protobuf.ByteString; +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.Schema; +import org.apache.kudu.master.Master; +import org.apache.kudu.util.Pair; + +/** + * RPC to fetch a table's schema + */ +@InterfaceAudience.Private +public class GetTableSchemaRequest extends KuduRpc { + private final String id; + private final String name; + private final List requiredFeatures; + + GetTableSchemaRequest(KuduTable masterTable, + String id, + String name, + Timer timer, + long timeoutMillis, + boolean requiresAuthzTokenSupport) { + super(masterTable, timer, timeoutMillis); + Preconditions.checkArgument(id != null ^ name != null, + "Only one of table ID or the table name should be provided"); + this.id = id; + this.name = name; + this.requiredFeatures = requiresAuthzTokenSupport ? + ImmutableList.of(Master.MasterFeatures.GENERATE_AUTHZ_TOKEN_VALUE) : + ImmutableList.of(); + } + + @Override + Message createRequestPB() { + final GetTableSchemaRequestPB.Builder builder = + GetTableSchemaRequestPB.newBuilder(); + TableIdentifierPB.Builder identifierBuilder = TableIdentifierPB.newBuilder(); + if (id != null) { + identifierBuilder.setTableId(ByteString.copyFromUtf8(id)); + } else { + Preconditions.checkNotNull(name); + identifierBuilder.setTableName(name); + } + builder.setTable(identifierBuilder.build()); + return builder.build(); + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return "GetTableSchema"; + } + + @Override + Pair deserialize(CallResponse callResponse, + String tsUUID) throws KuduException { + final GetTableSchemaResponsePB.Builder respBuilder = GetTableSchemaResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), respBuilder); + Schema schema = ProtobufHelper.pbToSchema(respBuilder.getSchema()); + GetTableSchemaResponse response = new GetTableSchemaResponse( + timeoutTracker.getElapsedMillis(), + tsUUID, + schema, + respBuilder.getTableId().toStringUtf8(), + respBuilder.getTableName(), + respBuilder.getNumReplicas(), + ProtobufHelper.pbToPartitionSchema(respBuilder.getPartitionSchema(), schema), + respBuilder.hasAuthzToken() ? respBuilder.getAuthzToken() : null, + respBuilder.getExtraConfigsMap(), + respBuilder.hasOwner() ? respBuilder.getOwner() : "", + respBuilder.hasComment() ? respBuilder.getComment() : ""); + return new Pair( + response, respBuilder.hasError() ? respBuilder.getError() : null); + } + + @Override + Collection getRequiredFeatures() { + return requiredFeatures; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableSchemaResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableSchemaResponse.java new file mode 100644 index 0000000000..dc728763b8 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableSchemaResponse.java @@ -0,0 +1,147 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.Map; + +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.Schema; +import org.apache.kudu.security.Token.SignedTokenPB; + +@InterfaceAudience.Private +public class GetTableSchemaResponse extends KuduRpcResponse { + + private final Schema schema; + private final PartitionSchema partitionSchema; + private final String tableId; + private final String tableName; + private final int numReplicas; + private final SignedTokenPB authzToken; + private final Map extraConfig; + private final String owner; + private final String comment; + + /** + * @param elapsedMillis Time in milliseconds since RPC creation to now + * @param tsUUID the UUID of the tablet server that sent the response + * @param schema the table's schema + * @param tableId the UUID of the table in the response + * @param tableName the name of the table in the response + * @param numReplicas the table's replication factor + * @param partitionSchema the table's partition schema + * @param authzToken an authorization token for use with this table + * @param extraConfig the table's extra configuration properties + * @param owner the table's owner + * @param owner the table's comment + */ + GetTableSchemaResponse(long elapsedMillis, + String tsUUID, + Schema schema, + String tableId, + String tableName, + int numReplicas, + PartitionSchema partitionSchema, + SignedTokenPB authzToken, + Map extraConfig, + String owner, + String comment) { + super(elapsedMillis, tsUUID); + this.schema = schema; + this.partitionSchema = partitionSchema; + this.tableId = tableId; + this.tableName = tableName; + this.numReplicas = numReplicas; + this.authzToken = authzToken; + this.extraConfig = extraConfig; + this.owner = owner; + this.comment = comment; + } + + /** + * Get the table's schema. + * @return Table's schema + */ + public Schema getSchema() { + return schema; + } + + /** + * Get the table's partition schema. + * @return the table's partition schema + */ + public PartitionSchema getPartitionSchema() { + return partitionSchema; + } + + /** + * Get the table's unique identifier. + * @return the table's tableId + */ + public String getTableId() { + return tableId; + } + + /** + * Get the table's name. + * @return the table's name + */ + public String getTableName() { + return tableName; + } + + /** + * Get the table's replication factor. + * @return the table's replication factor + */ + public int getNumReplicas() { + return numReplicas; + } + + /** + * Get the authorization token for the table. + * @return the table's authz token + */ + public SignedTokenPB getAuthzToken() { + return authzToken; + } + + /** + * Get the table's extra configuration properties. + * @return the table's extra configuration properties + */ + public Map getExtraConfig() { + return extraConfig; + } + + /** + * Get the owner for the table. + * @return the table's owner + */ + public String getOwner() { + return owner; + } + + /** + * Get the comment for the table. + * @return the table's comment + */ + public String getComment() { + return comment; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableStatisticsRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableStatisticsRequest.java new file mode 100644 index 0000000000..6a0baf4a1f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableStatisticsRequest.java @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.master.Master; +import org.apache.kudu.util.Pair; + +@InterfaceAudience.Private +class GetTableStatisticsRequest extends KuduRpc { + + static final String GET_TABLE_STATISTICS = "GetTableStatistics"; + + private final String name; + + GetTableStatisticsRequest(KuduTable table, + String name, + Timer timer, + long timeoutMillis) { + super(table, timer, timeoutMillis); + this.name = name; + } + + @Override + Message createRequestPB() { + final Master.GetTableStatisticsRequestPB.Builder builder = + Master.GetTableStatisticsRequestPB.newBuilder(); + Master.TableIdentifierPB tableID = + Master.TableIdentifierPB.newBuilder().setTableName(name).build(); + builder.setTable(tableID); + return builder.build(); + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return GET_TABLE_STATISTICS; + } + + @Override + Pair deserialize(CallResponse callResponse, + String tsUUID) throws KuduException { + final Master.GetTableStatisticsResponsePB.Builder respBuilder = + Master.GetTableStatisticsResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), respBuilder); + GetTableStatisticsResponse response = new GetTableStatisticsResponse( + timeoutTracker.getElapsedMillis(), + tsUUID, + respBuilder.getOnDiskSize(), + respBuilder.getLiveRowCount()); + return new Pair( + response, respBuilder.hasError() ? respBuilder.getError() : null); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableStatisticsResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableStatisticsResponse.java new file mode 100644 index 0000000000..92f46f5a7e --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTableStatisticsResponse.java @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +class GetTableStatisticsResponse extends KuduRpcResponse { + + private final long onDiskSize; + private final long liveRowCount; + + + /** + * @param elapsedMillis Time in milliseconds since RPC creation to now + * @param tsUUID the UUID of the tablet server that sent the response + * @param onDiskSize the table's on disk size + * @param liveRowCount the table's live row count + */ + GetTableStatisticsResponse(long elapsedMillis, + String tsUUID, + long onDiskSize, + long liveRowCount) { + super(elapsedMillis, tsUUID); + this.onDiskSize = onDiskSize; + this.liveRowCount = liveRowCount; + } + + /** + * Get the table's on disk size, this statistic is pre-replication. + * @return Table's on disk size + */ + public long getOnDiskSize() { + return onDiskSize; + } + + /** + * Get the table's live row count, this statistic is pre-replication. + * @return Table's live row count + */ + public long getLiveRowCount() { + return liveRowCount; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTransactionStateRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTransactionStateRequest.java new file mode 100644 index 0000000000..702806af51 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTransactionStateRequest.java @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.transactions.TxnManager.GetTransactionStateResponsePB; + +import java.util.Collection; +import java.util.List; +import java.util.OptionalLong; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.transactions.TxnManager; +import org.apache.kudu.util.Pair; + +/** + * A wrapper class for kudu.transactions.TxnManagerService.GetTransactionState RPC. + */ +@InterfaceAudience.Private +class GetTransactionStateRequest extends KuduRpc { + private static final List featureFlags = ImmutableList.of(); + final long txnId; + + GetTransactionStateRequest( + KuduTable masterTable, Timer timer, long timeoutMillis, long txnId) { + super(masterTable, timer, timeoutMillis); + Preconditions.checkArgument(txnId > AsyncKuduClient.INVALID_TXN_ID); + this.txnId = txnId; + } + + @Override + Message createRequestPB() { + final TxnManager.GetTransactionStateRequestPB.Builder b = + TxnManager.GetTransactionStateRequestPB.newBuilder(); + b.setTxnId(txnId); + return b.build(); + } + + @Override + String serviceName() { + return TXN_MANAGER_SERVICE_NAME; + } + + @Override + String method() { + return "GetTransactionState"; + } + + @Override + Pair deserialize( + final CallResponse callResponse, String serverUUID) throws KuduException { + final GetTransactionStateResponsePB.Builder b = + GetTransactionStateResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), b); + if (!b.hasError()) { + Preconditions.checkState(b.hasState()); + } + OptionalLong ts = b.hasCommitTimestamp() ? OptionalLong.of(b.getCommitTimestamp()) + : OptionalLong.empty(); + GetTransactionStateResponse response = new GetTransactionStateResponse( + timeoutTracker.getElapsedMillis(), serverUUID, b.getState(), ts); + return new Pair<>(response, b.hasError() ? b.getError() : null); + } + + @Override + Collection getRequiredFeatures() { + return featureFlags; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTransactionStateResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTransactionStateResponse.java new file mode 100644 index 0000000000..d4102415ee --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/GetTransactionStateResponse.java @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.OptionalLong; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.transactions.Transactions; + +@InterfaceAudience.Private +public class GetTransactionStateResponse extends KuduRpcResponse { + private final Transactions.TxnStatePB txnState; + private final OptionalLong txnCommitTimestamp; + + /** + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param serverUUID UUID of the server that sent the response + * @param txnState the state of the transaction + */ + GetTransactionStateResponse( + long elapsedMillis, + String serverUUID, + Transactions.TxnStatePB txnState, + OptionalLong txnCommitTimestamp) { + super(elapsedMillis, serverUUID); + this.txnState = txnState; + this.txnCommitTimestamp = txnCommitTimestamp; + } + + public Transactions.TxnStatePB txnState() { + return txnState; + } + + boolean hasCommitTimestamp() { + return txnCommitTimestamp.isPresent(); + } + + long getCommitTimestamp() { + Preconditions.checkState(hasCommitTimestamp()); + return txnCommitTimestamp.getAsLong(); + } + + public boolean isCommitted() { + return txnState == Transactions.TxnStatePB.COMMITTED; + } + + public boolean isAborted() { + return txnState == Transactions.TxnStatePB.ABORTED; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/HasFailedRpcException.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/HasFailedRpcException.java new file mode 100644 index 0000000000..e05e2f2b91 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/HasFailedRpcException.java @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Interface implemented by {@link KuduException}s that can tell you which + * RPC failed. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface HasFailedRpcException { + + /** + * Returns the RPC that caused this exception. + */ + KuduRpc getFailedRpc(); + +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/HiveMetastoreConfig.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/HiveMetastoreConfig.java new file mode 100644 index 0000000000..946655f6d2 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/HiveMetastoreConfig.java @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import javax.annotation.CheckForNull; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * The Hive Metastore configuration of a Kudu cluster. + */ +@InterfaceAudience.LimitedPrivate("Impala") +@InterfaceStability.Unstable +public class HiveMetastoreConfig { + private final String hiveMetastoreUris; + private final boolean hiveMetastoreSaslEnabled; + private final String hiveMetastoreUuid; + + HiveMetastoreConfig(String hiveMetastoreUris, + boolean hiveMetastoreSaslEnabled, + String hiveMetastoreUuid) { + this.hiveMetastoreUris = hiveMetastoreUris; + this.hiveMetastoreSaslEnabled = hiveMetastoreSaslEnabled; + this.hiveMetastoreUuid = hiveMetastoreUuid; + } + + public String getHiveMetastoreUris() { + return hiveMetastoreUris; + } + + public boolean getHiveMetastoreSaslEnabled() { + return hiveMetastoreSaslEnabled; + } + + @CheckForNull + public String getHiveMetastoreUuid() { + return hiveMetastoreUuid; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/HostAndPort.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/HostAndPort.java new file mode 100644 index 0000000000..1aae477ef9 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/HostAndPort.java @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.net.InetSocketAddress; + +import com.google.common.base.Objects; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * A simple wrapper around InetSocketAddress to prevent + * accidentally introducing DNS lookups. + * + * The HostAndPort implementation in Guava is not used + * because Guava is shaded and relocated in Kudu preventing + * it from being used as a parameter or return value on + * public methods. Additionally Guava's HostAndPort + * implementation is marked as beta. + */ +@InterfaceAudience.Private +public class HostAndPort { + + private final InetSocketAddress address; + + public HostAndPort(String host, int port) { + // Using createUnresolved ensures no lookups will occur. + this.address = InetSocketAddress.createUnresolved(host, port); + } + + public String getHost() { + // Use getHostString to ensure no reverse lookup is done. + return address.getHostString(); + } + + public int getPort() { + return address.getPort(); + } + + public InetSocketAddress getAddress() { + return address; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof HostAndPort)) { + return false; + } + HostAndPort that = (HostAndPort) o; + return Objects.equal(address, that.address); + } + + @Override + public int hashCode() { + return Objects.hashCode(address); + } + + @Override + public String toString() { + return address.getHostName() + ":" + address.getPort(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IPCUtil.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IPCUtil.java new file mode 100644 index 0000000000..16ece95298 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IPCUtil.java @@ -0,0 +1,92 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kudu.client; + +import java.io.IOException; +import java.io.OutputStream; + +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.Message; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Helper methods for RPCs. + */ +@InterfaceAudience.Private +public class IPCUtil { + + private IPCUtil() { + } + + /** + * Write out header, param, and cell block if there is one. + * @param dos + * @param header + * @param param + * @return Total number of bytes written. + * @throws java.io.IOException + */ + public static int write(final OutputStream dos, final Message header, final Message param) + throws IOException { + // Must calculate total size and write that first so other side can read it all in in one + // swoop. This is dictated by how the server is currently written. Server needs to change + // if we are to be able to write without the length prefixing. + int totalSize = IPCUtil.getTotalSizeWhenWrittenDelimited(header, param); + return write(dos, header, param, totalSize); + } + + private static int write(final OutputStream dos, final Message header, final Message param, + final int totalSize) + throws IOException { + // I confirmed toBytes does same as say DataOutputStream#writeInt. + dos.write(toBytes(totalSize)); + header.writeDelimitedTo(dos); + if (param != null) { + param.writeDelimitedTo(dos); + } + dos.flush(); + return totalSize; + } + + /** + * @return Size on the wire when the two messages are written with writeDelimitedTo + */ + public static int getTotalSizeWhenWrittenDelimited(Message ... messages) { + int totalSize = 0; + for (Message m: messages) { + if (m == null) { + continue; + } + totalSize += m.getSerializedSize(); + totalSize += CodedOutputStream.computeUInt32SizeNoTag(m.getSerializedSize()); + } + return totalSize; + } + + public static byte[] toBytes(int val) { + byte [] b = new byte[4]; + for (int i = 3; i > 0; i--) { + b[i] = (byte) val; + val >>>= 8; + } + b[0] = (byte) val; + return b; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Insert.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Insert.java new file mode 100644 index 0000000000..6ac7fc9dec --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Insert.java @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Represents a single row insert. + * Instances of this class should not be reused. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class Insert extends Operation { + + Insert(KuduTable table) { + super(table); + } + + @Override + ChangeType getChangeType() { + return ChangeType.INSERT; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/InsertIgnore.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/InsertIgnore.java new file mode 100644 index 0000000000..507d22db4a --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/InsertIgnore.java @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Represents a single row insert ignoring duplicate rows. + * Instances of this class should not be reused. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class InsertIgnore extends Operation { + + InsertIgnore(KuduTable table) { + super(table); + } + + @Override + ChangeType getChangeType() { + return ChangeType.INSERT_IGNORE; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/InvalidAuthnTokenException.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/InvalidAuthnTokenException.java new file mode 100644 index 0000000000..2f88414b40 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/InvalidAuthnTokenException.java @@ -0,0 +1,39 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Exception for notifying on invalid authn token. In most use cases in the Kudu Java client code, + * 'invalid authn token' means 'expired authn token'. Receiving this exception means the current + * authentication token is no longer valid and a new one is needed to establish connections to + * the Kudu servers for sending RPCs. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +class InvalidAuthnTokenException extends RecoverableException { + /** + * @param status status object containing the reason for the exception trace + */ + InvalidAuthnTokenException(Status status) { + super(status); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/InvalidAuthzTokenException.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/InvalidAuthzTokenException.java new file mode 100644 index 0000000000..4adb82f995 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/InvalidAuthzTokenException.java @@ -0,0 +1,40 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Exception for notifying of an invalid authorization token. In most use cases + * in the Kudu Java client code, 'invalid authz token' means 'expired authz + * token'. Receiving this exception means the authorization token used to make a + * request is no longer valid and a new one is needed to make requests that + * access data. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +class InvalidAuthzTokenException extends RecoverableException { + /** + * @param status status object containing the reason for the exception trace + */ + InvalidAuthzTokenException(Status status) { + super(status); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsAlterTableDoneRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsAlterTableDoneRequest.java new file mode 100644 index 0000000000..3a8a3215e2 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsAlterTableDoneRequest.java @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.master.Master.IsAlterTableDoneRequestPB; +import static org.apache.kudu.master.Master.IsAlterTableDoneResponsePB; +import static org.apache.kudu.master.Master.TableIdentifierPB; + +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.util.Pair; + +/** + * RPC used to check if an alter is running for the specified table + */ +@InterfaceAudience.Private +class IsAlterTableDoneRequest extends KuduRpc { + private final TableIdentifierPB.Builder tableId; + + IsAlterTableDoneRequest(KuduTable masterTable, + TableIdentifierPB.Builder tableId, + Timer timer, + long timeoutMillis) { + super(masterTable, timer, timeoutMillis); + this.tableId = tableId; + } + + @Override + Message createRequestPB() { + final IsAlterTableDoneRequestPB.Builder builder = + IsAlterTableDoneRequestPB.newBuilder(); + builder.setTable(tableId); + return builder.build(); + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return "IsAlterTableDone"; + } + + @Override + Pair deserialize(final CallResponse callResponse, + String tsUUID) throws KuduException { + final IsAlterTableDoneResponsePB.Builder respBuilder = IsAlterTableDoneResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), respBuilder); + IsAlterTableDoneResponse resp = new IsAlterTableDoneResponse(timeoutTracker.getElapsedMillis(), + tsUUID, + respBuilder.getDone()); + return new Pair( + resp, respBuilder.hasError() ? respBuilder.getError() : null); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsAlterTableDoneResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsAlterTableDoneResponse.java new file mode 100644 index 0000000000..d1fac034bb --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsAlterTableDoneResponse.java @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Response to a isAlterTableDone command to use to know if an alter table is currently running on + * the specified table. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class IsAlterTableDoneResponse extends KuduRpcResponse { + + private final boolean done; + + IsAlterTableDoneResponse(long elapsedMillis, String tsUUID, boolean done) { + super(elapsedMillis, tsUUID); + this.done = done; + } + + /** + * Tells if the table is done being altered or not. + * @return whether the table alter is done + */ + public boolean isDone() { + return done; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsCreateTableDoneRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsCreateTableDoneRequest.java new file mode 100644 index 0000000000..2eb0d7c542 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsCreateTableDoneRequest.java @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.master.Master.IsCreateTableDoneRequestPB; +import org.apache.kudu.master.Master.IsCreateTableDoneResponsePB; +import org.apache.kudu.master.Master.TableIdentifierPB; +import org.apache.kudu.util.Pair; + +/** + * Package-private RPC that can only go to a master. + */ +@InterfaceAudience.Private +class IsCreateTableDoneRequest extends KuduRpc { + private final TableIdentifierPB.Builder tableId; + + IsCreateTableDoneRequest(KuduTable masterTable, + TableIdentifierPB.Builder tableId, + Timer timer, + long timeoutMillis) { + super(masterTable, timer, timeoutMillis); + this.tableId = tableId; + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return "IsCreateTableDone"; + } + + @Override + Pair deserialize( + final CallResponse callResponse, String tsUUID) throws KuduException { + IsCreateTableDoneResponsePB.Builder builder = + IsCreateTableDoneResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), builder); + IsCreateTableDoneResponse resp = + new IsCreateTableDoneResponse(timeoutTracker.getElapsedMillis(), + tsUUID, + builder.getDone()); + return new Pair( + resp, builder.hasError() ? builder.getError() : null); + } + + @Override + Message createRequestPB() { + final IsCreateTableDoneRequestPB.Builder builder = + IsCreateTableDoneRequestPB.newBuilder(); + builder.setTable(tableId); + return builder.build(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsCreateTableDoneResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsCreateTableDoneResponse.java new file mode 100644 index 0000000000..b306ed9741 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/IsCreateTableDoneResponse.java @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Response to an isCreateTableDone command. Describes whether the table is + * still being created. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class IsCreateTableDoneResponse extends KuduRpcResponse { + + private final boolean done; + + IsCreateTableDoneResponse(long elapsedMillis, String tsUUID, boolean done) { + super(elapsedMillis, tsUUID); + this.done = done; + } + + /** + * Returns whether the table is done being created. + * @return whether table creation is finished + */ + public boolean isDone() { + return done; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeepTransactionAliveRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeepTransactionAliveRequest.java new file mode 100644 index 0000000000..b446b2a7e1 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeepTransactionAliveRequest.java @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.transactions.TxnManager.KeepTransactionAliveResponsePB; + +import java.util.Collection; +import java.util.List; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.transactions.TxnManager; +import org.apache.kudu.util.Pair; + +/** + * A wrapper class for kudu.transactions.TxnManagerService.CoordinateTransaction RPC. + */ +@InterfaceAudience.Private +class KeepTransactionAliveRequest extends KuduRpc { + private static final List featureFlags = ImmutableList.of(); + private final long txnId; + + KeepTransactionAliveRequest(KuduTable masterTable, + Timer timer, + long timeoutMillis, + long txnId) { + super(masterTable, timer, timeoutMillis); + Preconditions.checkArgument(txnId > AsyncKuduClient.INVALID_TXN_ID); + this.txnId = txnId; + } + + @Override + Message createRequestPB() { + final TxnManager.KeepTransactionAliveRequestPB.Builder b = + TxnManager.KeepTransactionAliveRequestPB.newBuilder(); + b.setTxnId(txnId); + return b.build(); + } + + @Override + String serviceName() { + return TXN_MANAGER_SERVICE_NAME; + } + + @Override + String method() { + return "KeepTransactionAlive"; + } + + @Override + Pair deserialize( + final CallResponse callResponse, String serverUUID) throws KuduException { + final KeepTransactionAliveResponsePB.Builder b = + KeepTransactionAliveResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), b); + KeepTransactionAliveResponse response = new KeepTransactionAliveResponse( + timeoutTracker.getElapsedMillis(), serverUUID); + return new Pair<>(response, b.hasError() ? b.getError() : null); + } + + @Override + Collection getRequiredFeatures() { + return featureFlags; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeepTransactionAliveResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeepTransactionAliveResponse.java new file mode 100644 index 0000000000..333ba9780d --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeepTransactionAliveResponse.java @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class KeepTransactionAliveResponse extends KuduRpcResponse { + /** + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param serverUUID UUID of the server that sent the response + */ + KeepTransactionAliveResponse(long elapsedMillis, String serverUUID) { + super(elapsedMillis, serverUUID); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeyEncoder.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeyEncoder.java new file mode 100644 index 0000000000..638df2bf36 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeyEncoder.java @@ -0,0 +1,506 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import com.google.common.primitives.Ints; +import com.google.common.primitives.UnsignedLongs; +import com.sangupta.murmur.Murmur2; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.PartitionSchema.HashBucketSchema; +import org.apache.kudu.util.ByteVec; +import org.apache.kudu.util.DateUtil; +import org.apache.kudu.util.DecimalUtil; +import org.apache.kudu.util.Pair; + +/** + * Utility class for encoding rows into primary and partition keys. + */ +@InterfaceAudience.Private +class KeyEncoder { + + private static final BigInteger MIN_VALUE_128 = BigInteger.valueOf(-2).pow(127); + + /** Non-constructable utility class. */ + private KeyEncoder() { + } + + /** + * Encodes the primary key of the row. + * + * @param row the row to encode + * @return the encoded primary key of the row + */ + public static byte[] encodePrimaryKey(final PartialRow row) { + ByteVec buf = ByteVec.create(); + final Schema schema = row.getSchema(); + for (int columnIdx = 0; columnIdx < schema.getPrimaryKeyColumnCount(); columnIdx++) { + final boolean isLast = columnIdx + 1 == schema.getPrimaryKeyColumnCount(); + encodeColumn(row, columnIdx, isLast, buf); + } + return buf.toArray(); + } + + /** + * Returns the bucket of the row for the given hash bucket schema. All columns + * in the hash bucket schema must be set in the row. + * + * @param row the row containing hash schema columns + * @param hashSchema the hash schema + * @return the hash bucket of the row + */ + public static int getHashBucket(PartialRow row, HashBucketSchema hashSchema) { + ByteVec buf = ByteVec.create(); + encodeColumns(row, hashSchema.getColumnIds(), buf); + long hash = Murmur2.hash64(buf.data(), buf.len(), hashSchema.getSeed()); + return (int) UnsignedLongs.remainder(hash, hashSchema.getNumBuckets()); + } + + /** + * Encodes the provided row into a partition key according to the partition schema. + * + * @param row the row to encode + * @param partitionSchema the partition schema describing the table's partitioning + * @return an encoded partition key + */ + public static byte[] encodePartitionKey(PartialRow row, PartitionSchema partitionSchema) { + ByteVec rangeBuf = ByteVec.create(); + encodeColumns(row, partitionSchema.getRangeSchema().getColumnIds(), rangeBuf); + + // Get the hash bucket schema for the range. + final List hashSchemas = + partitionSchema.getHashSchemaForRange(rangeBuf.toArray()); + ByteVec buf = ByteVec.create(); + for (final HashBucketSchema hashSchema : hashSchemas) { + encodeHashBucket(getHashBucket(row, hashSchema), buf); + } + + buf.append(rangeBuf); + return buf.toArray(); + } + + /** + * Encodes the provided row into a range partition key. + * + * @param row the row to encode + * @param rangeSchema the range partition schema + * @return the encoded range partition key + */ + public static byte[] encodeRangePartitionKey(PartialRow row, + PartitionSchema.RangeSchema rangeSchema) { + ByteVec buf = ByteVec.create(); + encodeColumns(row, rangeSchema.getColumnIds(), buf); + return buf.toArray(); + } + + /** + * Encodes a sequence of columns from the row. + * @param row the row containing the columns to encode + * @param columnIds the IDs of each column to encode + */ + private static void encodeColumns(PartialRow row, List columnIds, ByteVec buf) { + for (int i = 0; i < columnIds.size(); i++) { + boolean isLast = i + 1 == columnIds.size(); + encodeColumn(row, row.getSchema().getColumnIndex(columnIds.get(i)), isLast, buf); + } + } + + /** + * Encodes a single column of a row into the output buffer. + * @param row the row being encoded + * @param columnIdx the column index of the column to encode + * @param isLast whether the column is the last component of the key + */ + private static void encodeColumn(PartialRow row, + int columnIdx, + boolean isLast, + ByteVec buf) { + final Schema schema = row.getSchema(); + final ColumnSchema column = schema.getColumnByIndex(columnIdx); + if (!row.isSet(columnIdx)) { + throw new IllegalStateException(String.format("Primary key column %s is not set", + column.getName())); + } + if (column.getType().isFixedSize()) { + encodeSignedInt( + row.getRowAlloc(), + schema.getColumnOffset(columnIdx), + column.getTypeSize(), + buf); + } else { + encodeBinary(row.getVarLengthData().get(columnIdx), isLast, buf); + } + } + + /** + * Encodes a variable length binary value into the output buffer. + * @param value the value to encode + * @param isLast whether the value is the final component in the key + * @param buf the output buffer + */ + private static void encodeBinary(ByteBuffer value, boolean isLast, ByteVec buf) { + value.reset(); + + // TODO find a way to not have to read byte-by-byte that doesn't require extra copies. This is + // especially slow now that users can pass direct byte buffers. + while (value.hasRemaining()) { + byte currentByte = value.get(); + buf.push(currentByte); + if (!isLast && currentByte == 0x00) { + // If we're a middle component of a composite key, we need to add a \x00 + // at the end in order to separate this component from the next one. However, + // if we just did that, we'd have issues where a key that actually has + // \x00 in it would compare wrong, so we have to instead add \x00\x00, and + // encode \x00 as \x00\x01. -- key_encoder.h + buf.push((byte) 0x01); + } + } + + if (!isLast) { + buf.push((byte) 0x00); + buf.push((byte) 0x00); + } + } + + /** + * Encodes a signed integer into the output buffer + * + * @param value an array containing the little-endian encoded integer + * @param offset the offset of the value into the value array + * @param len the width of the value + * @param buf the output buffer + */ + private static void encodeSignedInt(byte[] value, + int offset, + int len, + ByteVec buf) { + // Picking the first byte because big endian. + byte lastByte = value[offset + (len - 1)]; + lastByte = Bytes.xorLeftMostBit(lastByte); + buf.push(lastByte); + for (int i = len - 2; i >= 0; i--) { + buf.push(value[offset + i]); + } + } + + /** + * Encodes a hash bucket into the buffer. + * @param bucket the bucket + * @param buf the buffer + */ + public static void encodeHashBucket(int bucket, ByteVec buf) { + buf.append(Ints.toByteArray(bucket)); + } + + /** + * Decodes a primary key into a row + * + * @param schema the table schema + * @param key the encoded key + * @return the decoded primary key as a row + */ + public static PartialRow decodePrimaryKey(Schema schema, byte[] key) { + PartialRow row = schema.newPartialRow(); + ByteBuffer buf = ByteBuffer.wrap(key); + buf.order(ByteOrder.BIG_ENDIAN); + + for (int idx = 0; idx < schema.getPrimaryKeyColumnCount(); idx++) { + decodeColumn(buf, row, idx, idx + 1 == schema.getPrimaryKeyColumnCount()); + } + + if (buf.hasRemaining()) { + throw new IllegalArgumentException("Unable to decode all primary key bytes"); + } + return row; + } + + /** + * Decodes a partition key into a list of hash buckets and range key + * + * @param schema the schema of the table + * @param partitionSchema the partition schema of the table + * @param key the encoded partition key + * @return the decoded buckets and range key + */ + public static Pair, PartialRow> decodePartitionKey(Schema schema, + PartitionSchema partitionSchema, + byte[] key) { + ByteBuffer buf = ByteBuffer.wrap(key); + buf.order(ByteOrder.BIG_ENDIAN); + + final List hashSchemas = partitionSchema.getHashSchemaForRange(key); + List buckets = new ArrayList<>(); + for (int i = 0; i < hashSchemas.size(); i++) { + if (buf.hasRemaining()) { + buckets.add(buf.getInt()); + } else { + buckets.add(0); + } + } + + return new Pair<>(buckets, decodeRangePartitionKey(schema, partitionSchema, buf)); + } + + /** + * Decodes a range partition key into a partial row. + * + * @param schema the schema of the table + * @param partitionSchema the partition schema of the table + * @param key the encoded range partition key + * @return the decoded range key + */ + public static PartialRow decodeRangePartitionKey(Schema schema, + PartitionSchema partitionSchema, + byte[] key) { + ByteBuffer buf = ByteBuffer.wrap(key); + buf.order(ByteOrder.BIG_ENDIAN); + return decodeRangePartitionKey(schema, partitionSchema, buf); + } + + /** + * Decodes a range partition key into a partial row. + * + * @param schema the schema of the table + * @param partitionSchema the partition schema of the table + * @param buf the encoded range partition key + * @return the decoded range key + */ + private static PartialRow decodeRangePartitionKey(Schema schema, + PartitionSchema partitionSchema, + ByteBuffer buf) { + PartialRow row = schema.newPartialRow(); + Iterator rangeIds = partitionSchema.getRangeSchema().getColumnIds().iterator(); + while (rangeIds.hasNext()) { + int idx = schema.getColumnIndex(rangeIds.next()); + if (buf.hasRemaining()) { + decodeColumn(buf, row, idx, !rangeIds.hasNext()); + } else { + row.setMin(idx); + } + } + + if (buf.hasRemaining()) { + throw new IllegalArgumentException("Unable to decode all partition key bytes"); + } + return row; + } + + /** + * Decoded a key-encoded column into a row. + * + * @param buf the buffer containing the column + * @param row the row to set the column value in + * @param idx the index of the column to decode + * @param isLast whether the column is the last column in the key + */ + private static void decodeColumn(ByteBuffer buf, PartialRow row, int idx, boolean isLast) { + Schema schema = row.getSchema(); + ColumnSchema column = schema.getColumnByIndex(idx); + switch (column.getType()) { + case INT8: + row.addByte(idx, (byte) (buf.get() ^ Byte.MIN_VALUE)); + break; + case INT16: + row.addShort(idx, (short) (buf.getShort() ^ Short.MIN_VALUE)); + break; + case DATE: { + int days = buf.getInt() ^ Integer.MIN_VALUE; + row.addDate(idx, DateUtil.epochDaysToSqlDate(days)); + break; + } + case INT32: + row.addInt(idx, buf.getInt() ^ Integer.MIN_VALUE); + break; + case INT64: + case UNIXTIME_MICROS: + row.addLong(idx, buf.getLong() ^ Long.MIN_VALUE); + break; + case BINARY: { + byte[] binary = decodeBinaryColumn(buf, isLast); + row.addBinary(idx, binary); + break; + } + case VARCHAR: { + byte[] binary = decodeBinaryColumn(buf, isLast); + row.addVarchar(idx, new String(binary, StandardCharsets.UTF_8)); + break; + } + case STRING: { + byte[] binary = decodeBinaryColumn(buf, isLast); + row.addStringUtf8(idx, binary); + break; + } + case DECIMAL: { + int scale = column.getTypeAttributes().getScale(); + int size = column.getTypeSize(); + switch (size) { + case DecimalUtil.DECIMAL32_SIZE: + int intVal = buf.getInt() ^ Integer.MIN_VALUE; + row.addDecimal(idx, BigDecimal.valueOf(intVal, scale)); + break; + case DecimalUtil.DECIMAL64_SIZE: + long longVal = buf.getLong() ^ Long.MIN_VALUE; + row.addDecimal(idx, BigDecimal.valueOf(longVal, scale)); + break; + case DecimalUtil.DECIMAL128_SIZE: + byte[] bytes = new byte[size]; + buf.get(bytes); + BigInteger bigIntVal = new BigInteger(bytes).xor(MIN_VALUE_128); + row.addDecimal(idx, new BigDecimal(bigIntVal, scale)); + break; + default: + throw new IllegalArgumentException("Unsupported decimal type size: " + size); + } + break; + } + default: + throw new IllegalArgumentException(String.format( + "The column type %s is not a valid key component type", + schema.getColumnByIndex(idx).getType())); + } + } + + /** + * Decode a binary key column. + * + * @param key the key bytes + * @param isLast whether the column is the final column in the key. + * @return the binary value. + */ + private static byte[] decodeBinaryColumn(ByteBuffer key, boolean isLast) { + if (isLast) { + byte[] bytes = Arrays.copyOfRange(key.array(), + key.arrayOffset() + key.position(), + key.arrayOffset() + key.limit()); + key.position(key.limit()); + return bytes; + } + + // When encoding a binary column that is not the final column in the key, a + // 0x0000 separator is used to retain lexicographic comparability. Null + // bytes in the input are escaped as 0x0001. + ByteVec buf = ByteVec.withCapacity(key.remaining()); + for (int i = key.position(); i < key.limit(); i++) { + if (key.get(i) == 0) { + switch (key.get(i + 1)) { + case 0: { + buf.append(key.array(), + key.arrayOffset() + key.position(), + i - key.position()); + key.position(i + 2); + return buf.toArray(); + } + case 1: { + buf.append(key.array(), + key.arrayOffset() + key.position(), + i + 1 - key.position()); + i++; + key.position(i + 1); + break; + } + default: throw new IllegalArgumentException("Unexpected binary sequence"); + } + } + } + + buf.append(key.array(), + key.arrayOffset() + key.position(), + key.remaining()); + key.position(key.limit()); + return buf.toArray(); + } + + /** + * Debug formats a partition key range. + * + * @param schema the table schema + * @param partitionSchema the table partition schema + * @param lowerBound the lower bound encoded partition key + * @param upperBound the upper bound encoded partition key + * @return a debug string which describes the partition key range. + */ + public static String formatPartitionKeyRange(Schema schema, + PartitionSchema partitionSchema, + byte[] lowerBound, + byte[] upperBound) { + if (partitionSchema.getRangeSchema().getColumnIds().isEmpty() && + partitionSchema.getHashBucketSchemas().isEmpty() && + partitionSchema.getRangesWithHashSchemas().isEmpty()) { + assert lowerBound.length == 0 && upperBound.length == 0; + return ""; + } + + // Even though we parse hash buckets for the upper and lower bound partition + // keys, we only use the lower bound set. Upper bound partition keys are + // exclusive, so the upper bound partition key may include an incremented + // bucket in the last position. + Pair, PartialRow> lower = decodePartitionKey(schema, partitionSchema, lowerBound); + Pair, PartialRow> upper = decodePartitionKey(schema, partitionSchema, upperBound); + + StringBuilder sb = new StringBuilder(); + + List hashBuckets = lower.getFirst(); + if (!hashBuckets.isEmpty()) { + sb.append("hash-partition-buckets: "); + sb.append(hashBuckets); + } + + if (!partitionSchema.getRangeSchema().getColumnIds().isEmpty()) { + if (!hashBuckets.isEmpty()) { + sb.append(", "); + } + + List idxs = new ArrayList<>(); + for (int id : partitionSchema.getRangeSchema().getColumnIds()) { + idxs.add(schema.getColumnIndex(id)); + } + + sb.append("range-partition: ["); + if (lowerBound.length > 4 * hashBuckets.size()) { + sb.append('('); + lower.getSecond().appendDebugString(idxs, sb); + sb.append(')'); + } else { + sb.append(""); + } + sb.append(", "); + if (upperBound.length > 4 * hashBuckets.size()) { + sb.append('('); + upper.getSecond().appendDebugString(idxs, sb); + sb.append(')'); + } else { + sb.append(""); + } + sb.append(')'); + } + return sb.toString(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeyRange.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeyRange.java new file mode 100644 index 0000000000..172129f875 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KeyRange.java @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; + +/* + * Class used to represent primary key range in tablet. + */ +@InterfaceAudience.Private +class KeyRange { + private byte[] primaryKeyStart; + private byte[] primaryKeyEnd; + private long dataSizeBytes; + private LocatedTablet tablet; + + /** + * Create a new key range [primaryKeyStart, primaryKeyEnd). + * @param tablet the tablet which the key range belongs to, cannot be null + * @param primaryKeyStart the encoded primary key where to start in the key range (inclusive) + * @param primaryKeyEnd the encoded primary key where to stop in the key range (exclusive) + * @param dataSizeBytes the estimated data size of the key range. + */ + public KeyRange(LocatedTablet tablet, + byte[] primaryKeyStart, + byte[] primaryKeyEnd, + long dataSizeBytes) { + Preconditions.checkNotNull(tablet); + this.tablet = tablet; + this.primaryKeyStart = primaryKeyStart; + this.primaryKeyEnd = primaryKeyEnd; + this.dataSizeBytes = dataSizeBytes; + } + + /** + * @return the start primary key + */ + public byte[] getPrimaryKeyStart() { + return primaryKeyStart; + } + + /** + * @return the end primary key + */ + public byte[] getPrimaryKeyEnd() { + return primaryKeyEnd; + } + + /** + * @return the located tablet + */ + public LocatedTablet getTablet() { + return tablet; + } + + /** + * @return the start partition key + */ + public byte[] getPartitionKeyStart() { + return tablet.getPartition().getPartitionKeyStart(); + } + + /** + * @return the end partition key + */ + public byte[] getPartitionKeyEnd() { + return tablet.getPartition().getPartitionKeyEnd(); + } + + /** + * @return the estimated data size of the key range + */ + public long getDataSizeBytes() { + return dataSizeBytes; + } + + @Override + public String toString() { + return String.format("[%s, %s), %s, %s", + primaryKeyStart == null || primaryKeyStart.length == 0 ? + "" : Bytes.hex(primaryKeyStart), + primaryKeyEnd == null || primaryKeyEnd.length == 0 ? + "" : Bytes.hex(primaryKeyEnd), + String.valueOf(dataSizeBytes), + tablet.toString()); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduClient.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduClient.java new file mode 100644 index 0000000000..6d8e7f82b5 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduClient.java @@ -0,0 +1,773 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.security.cert.CertificateException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Executor; + +import com.google.common.base.Preconditions; +import com.google.protobuf.ByteString; +import com.stumbleupon.async.Deferred; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.Schema; +import org.apache.kudu.master.Master.TableIdentifierPB; + +/** + * A synchronous and thread-safe client for Kudu. + *

+ * This class acts as a wrapper around {@link AsyncKuduClient} which contains all the relevant + * documentation. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class KuduClient implements AutoCloseable { + + private static final Logger LOG = LoggerFactory.getLogger(KuduClient.class); + public static final long NO_TIMESTAMP = -1; + + @InterfaceAudience.LimitedPrivate("Test") + final AsyncKuduClient asyncClient; + + KuduClient(AsyncKuduClient asyncClient) { + this.asyncClient = asyncClient; + } + + /** + * Updates the last timestamp received from a server. Used for CLIENT_PROPAGATED + * external consistency. + * + * @param lastPropagatedTimestamp the last timestamp received from a server. + */ + public void updateLastPropagatedTimestamp(long lastPropagatedTimestamp) { + asyncClient.updateLastPropagatedTimestamp(lastPropagatedTimestamp); + } + + /** + * Returns the last timestamp received from a server. Used for CLIENT_PROPAGATED + * external consistency. Note that the returned timestamp is encoded and cannot be + * interpreted as a raw timestamp. + * + * @return a long indicating the specially-encoded last timestamp received from a server + */ + public long getLastPropagatedTimestamp() { + return asyncClient.getLastPropagatedTimestamp(); + } + + /** + * Checks if the client received any timestamps from a server. Used for + * CLIENT_PROPAGATED external consistency. + * + * @return true if last propagated timestamp has been set + */ + public boolean hasLastPropagatedTimestamp() { + return asyncClient.hasLastPropagatedTimestamp(); + } + + /** + * Returns a string representation of this client's location. If this + * client was not assigned a location, returns the empty string. + * + * @return a string representation of this client's location + */ + public String getLocationString() { + return asyncClient.getLocationString(); + } + + /** + * Returns the ID of the cluster that this client is connected to. + * It will be an empty string if the client is not connected or + * the client is connected to a cluster that doesn't support + * cluster IDs. + * + * @return the ID of the cluster that this client is connected to + */ + public String getClusterId() { + return asyncClient.getClusterId(); + } + + /** + * Returns the unique client id assigned to this client. + * @return the unique client id assigned to this client. + */ + String getClientId() { + return asyncClient.getClientId(); + } + + /** + * Returns the Hive Metastore configuration of the cluster. + * + * @return the Hive Metastore configuration of the cluster + * @throws KuduException if the configuration can not be retrieved + */ + @InterfaceAudience.LimitedPrivate("Impala") + @InterfaceStability.Unstable + public HiveMetastoreConfig getHiveMetastoreConfig() throws KuduException { + return joinAndHandleException(asyncClient.getHiveMetastoreConfig()); + } + + /** + * Create a table on the cluster with the specified name, schema, and table configurations. + * @param name the table's name + * @param schema the table's schema + * @param builder a builder containing the table's configurations + * @return an object to communicate with the created table + * @throws KuduException if anything went wrong + */ + public KuduTable createTable(String name, Schema schema, CreateTableOptions builder) + throws KuduException { + Deferred d = asyncClient.createTable(name, schema, builder); + return joinAndHandleException(d); + } + + /** + * Waits for all of the tablets in a table to be created, or until the + * default admin operation timeout is reached. + * @param name the table's name + * @return true if the table is done being created, or false if the default + * admin operation timeout was reached. + * @throws KuduException for any error returned by sending RPCs to the master + * (e.g. the table does not exist) + */ + public boolean isCreateTableDone(String name) throws KuduException { + TableIdentifierPB.Builder table = TableIdentifierPB.newBuilder().setTableName(name); + Deferred d = asyncClient.getDelayedIsCreateTableDoneDeferred(table, null, null); + try { + joinAndHandleException(d); + } catch (KuduException e) { + if (e.getStatus().isTimedOut()) { + return false; + } + throw e; + } + return true; + } + + /** + * Delete a table on the cluster with the specified name. + * The deleted table may turn to soft-deleted status with the flag + * default_deleted_table_reserve_seconds set to nonzero on the master side. + * + * @param name the table's name + * @return an rpc response object + * @throws KuduException if anything went wrong + */ + public DeleteTableResponse deleteTable(String name) throws KuduException { + Deferred d = asyncClient.deleteTable(name); + return joinAndHandleException(d); + } + + /** + * SoftDelete a table on the cluster with the specified name, the table will be + * reserved for reserveSeconds before being purged. + * @param name the table's name + * @param reserveSeconds the soft deleted table to be alive time + * @return an rpc response object + * @throws KuduException if anything went wrong + */ + public DeleteTableResponse deleteTable(String name, + int reserveSeconds) throws KuduException { + Deferred d = asyncClient.deleteTable(name, reserveSeconds); + return joinAndHandleException(d); + } + + /** + * Recall a deleted table on the cluster with the specified table id + * @param id the table's id + * @return an rpc response object + * @throws KuduException if anything went wrong + */ + public RecallDeletedTableResponse recallDeletedTable(String id) throws KuduException { + Deferred d = asyncClient.recallDeletedTable(id); + return joinAndHandleException(d); + } + + /** + * Recall a deleted table on the cluster with the specified table id + * and give the recalled table the new table name + * @param id the table's id + * @param newTableName the recalled table's new name + * @return an rpc response object + * @throws KuduException if anything went wrong + */ + public RecallDeletedTableResponse recallDeletedTable(String id, + String newTableName) throws KuduException { + Deferred d = asyncClient.recallDeletedTable(id, + newTableName); + return joinAndHandleException(d); + } + + /** + * Alter a table on the cluster as specified by the builder. + * @param name the table's name (old name if the table is being renamed) + * @param ato the alter table options + * @return an rpc response object + * @throws KuduException if anything went wrong + */ + public AlterTableResponse alterTable(String name, AlterTableOptions ato) throws KuduException { + Deferred d = asyncClient.alterTable(name, ato); + return joinAndHandleException(d); + } + + /** + * Waits for all of the tablets in a table to be altered, or until the + * default admin operation timeout is reached. + * @param name the table's name + * @return true if the table is done being altered, or false if the default + * admin operation timeout was reached. + * @throws KuduException for any error returned by sending RPCs to the master + * (e.g. the table does not exist) + */ + public boolean isAlterTableDone(String name) throws KuduException { + TableIdentifierPB.Builder table = TableIdentifierPB.newBuilder().setTableName(name); + Deferred d = + asyncClient.getDelayedIsAlterTableDoneDeferred(table, null, null); + try { + joinAndHandleException(d); + } catch (KuduException e) { + if (e.getStatus().isTimedOut()) { + return false; + } + throw e; + } + return true; + } + + /** + * Get the list of running tablet servers. + * @return a list of tablet servers + * @throws KuduException if anything went wrong + */ + public ListTabletServersResponse listTabletServers() throws KuduException { + Deferred d = asyncClient.listTabletServers(); + return joinAndHandleException(d); + } + + /** + * Get the list of all the regular tables. + * @return a list of all the regular tables + * @throws KuduException if anything went wrong + */ + public ListTablesResponse getTablesList() throws KuduException { + return getTablesList(null); + } + + /** + * Get a list of regular table names. Passing a null filter returns all the tables. + * When a filter is specified, it only returns tables that satisfy a substring match. + * @param nameFilter an optional table name filter + * @return a deferred that contains the list of table names + * @throws KuduException if anything went wrong + */ + public ListTablesResponse getTablesList(String nameFilter) throws KuduException { + Deferred d = asyncClient.getTablesList(nameFilter, false); + return joinAndHandleException(d); + } + + /** + * Get the list of all the soft deleted tables. + * @return a list of all the soft deleted tables + * @throws KuduException if anything went wrong + */ + public ListTablesResponse getSoftDeletedTablesList() throws KuduException { + return getSoftDeletedTablesList(null); + } + + /** + * Get list of soft deleted table names. Passing a null filter returns all the tables. + * When a filter is specified, it only returns tables that satisfy a substring match. + * @param nameFilter an optional table name filter + * @return a deferred that contains the list of table names + * @throws KuduException if anything went wrong + */ + public ListTablesResponse getSoftDeletedTablesList(String nameFilter) throws KuduException { + Deferred d = asyncClient.getTablesList(nameFilter, true); + return joinAndHandleException(d); + } + + /** + * Get table's statistics from master. + * @param name the table's name + * @return the statistics of table + * @throws KuduException if anything went wrong + */ + public KuduTableStatistics getTableStatistics(String name) throws KuduException { + Deferred d = asyncClient.getTableStatistics(name); + return joinAndHandleException(d); + } + + /** + * Test if a table exists. + * @param name a non-null table name + * @return true if the table exists, else false + * @throws KuduException if anything went wrong + */ + public boolean tableExists(String name) throws KuduException { + Deferred d = asyncClient.tableExists(name); + return joinAndHandleException(d); + } + + /** + * Open the table with the given id. + * + * @param id the id of the table to open + * @return a KuduTable if the table exists + * @throws KuduException if anything went wrong + */ + KuduTable openTableById(final String id) throws KuduException { + Deferred d = asyncClient.openTableById(id); + return joinAndHandleException(d); + } + + /** + * Open the table with the given name. + * + * New range partitions created by other clients will immediately be available + * after opening the table. + * + * @param name table to open + * @return a KuduTable if the table exists + * @throws KuduException if anything went wrong + */ + public KuduTable openTable(final String name) throws KuduException { + Deferred d = asyncClient.openTable(name); + return joinAndHandleException(d); + } + + /** + * Create a new session for interacting with the cluster. + * User is responsible for destroying the session object. + * This is a fully local operation (no RPCs or blocking). + * @return a synchronous wrapper around KuduSession. + */ + public KuduSession newSession() { + AsyncKuduSession session = asyncClient.newSession(); + return new KuduSession(session); + } + + /** + * Start a new multi-row distributed transaction. + *

+ * Start a new multi-row transaction and return a handle for the transactional + * object to manage the newly started transaction. Under the hood, this makes + * an RPC call to the Kudu cluster and registers a newly created transaction + * in the system. This call is blocking. + * + * @return a handle to the newly started transaction in case of success + */ + public KuduTransaction newTransaction() throws KuduException { + KuduTransaction txn = new KuduTransaction(asyncClient); + txn.begin(); + return txn; + } + + /** + * Check if statistics collection is enabled for this client. + * @return true if it is enabled, else false + */ + public boolean isStatisticsEnabled() { + return asyncClient.isStatisticsEnabled(); + } + + /** + * Get the statistics object of this client. + * + * @return this client's Statistics object + * @throws IllegalStateException thrown if statistics collection has been disabled + */ + public Statistics getStatistics() { + return asyncClient.getStatistics(); + } + + /** + * Creates a new {@link KuduScanner.KuduScannerBuilder} for a particular table. + * @param table the table you intend to scan. + * The string is assumed to use the platform's default charset. + * @return a new scanner builder for the table + */ + public KuduScanner.KuduScannerBuilder newScannerBuilder(KuduTable table) { + return new KuduScanner.KuduScannerBuilder(asyncClient, table); + } + + /** + * Creates a new {@link KuduScanToken.KuduScanTokenBuilder} for a particular table. + * Used for integrations with compute frameworks. + * @param table the table you intend to scan + * @return a new scan token builder for the table + */ + public KuduScanToken.KuduScanTokenBuilder newScanTokenBuilder(KuduTable table) { + return new KuduScanToken.KuduScanTokenBuilder(asyncClient, table); + } + + /** + * Analogous to {@link #shutdown()}. + * @throws KuduException if an error happens while closing the connections + */ + @Override + public void close() throws KuduException { + try { + asyncClient.close(); + } catch (Exception e) { + throw KuduException.transformException(e); + } + } + + /** + * Performs a graceful shutdown of this instance. + * @throws KuduException if anything went wrong + */ + public void shutdown() throws KuduException { + Deferred> d = asyncClient.shutdown(); + joinAndHandleException(d); + } + + /** + * Export serialized authentication data that may be passed to a different + * client instance and imported to provide that client the ability to connect + * to the cluster. + */ + @InterfaceStability.Unstable + public byte[] exportAuthenticationCredentials() throws KuduException { + return joinAndHandleException(asyncClient.exportAuthenticationCredentials()); + } + + /** + * Import data allowing this client to authenticate to the cluster. + * This will typically be used before making any connections to servers + * in the cluster. + * + * Note that, if this client has already been used by one user, this + * method cannot be used to switch authenticated users. Attempts to + * do so have undefined results, and may throw an exception. + * + * @param authnData then authentication data provided by a prior call to + * {@link #exportAuthenticationCredentials()} + */ + @InterfaceStability.Unstable + public void importAuthenticationCredentials(byte[] authnData) { + asyncClient.importAuthenticationCredentials(authnData); + } + + /** + * Mark the given CA certificates (in DER format) as the trusted ones for the + * client. The provided list of certificates replaces any previously set ones. + * + * @param certificates list of certificates to trust (in DER format) + * @throws CertificateException if any of the specified certificates were invalid + */ + @InterfaceStability.Unstable + public void trustedCertificates(List certificates) throws CertificateException { + asyncClient.trustedCertificates(certificates); + } + + /** + * Set JWT (JSON Web Token) to authenticate the client to a server. + *

+ * @note If {@link #importAuthenticationCredentials(byte[] authnData)} and + * this method are called on the same object, the JWT provided with this call + * overrides the corresponding JWT that comes as a part of the imported + * authentication credentials (if present). + * + * @param jwt The JSON web token to set. + */ + @InterfaceStability.Unstable + public void jwt(String jwt) { + asyncClient.jwt(jwt); + } + + /** + * Get the timeout used for operations on sessions and scanners. + * @return a timeout in milliseconds + */ + public long getDefaultOperationTimeoutMs() { + return asyncClient.getDefaultOperationTimeoutMs(); + } + + /** + * Get the timeout used for admin operations. + * @return a timeout in milliseconds + */ + public long getDefaultAdminOperationTimeoutMs() { + return asyncClient.getDefaultAdminOperationTimeoutMs(); + } + + /** + * @return the list of master addresses, stringified using commas to separate + * them + */ + public String getMasterAddressesAsString() { + return asyncClient.getMasterAddressesAsString(); + } + + /** + * Sends a request to the master to check if the cluster supports ignore operations, including + * InsertIgnore, UpdateIgnore and DeleteIgnore operations. + * @return true if the cluster supports ignore operations + */ + @InterfaceAudience.Private + public boolean supportsIgnoreOperations() throws KuduException { + return joinAndHandleException(asyncClient.supportsIgnoreOperations()); + } + + /** + * @return a HostAndPort describing the current leader master + * @throws KuduException if a leader master could not be found in time + */ + @InterfaceAudience.LimitedPrivate("Test") + public HostAndPort findLeaderMasterServer() throws KuduException { + // Consult the cache to determine the current leader master. + // + // If one isn't found, issue an RPC that retries until the leader master + // is discovered. We don't need the RPC's results; it's just a simple way to + // wait until a leader master is elected. + TableLocationsCache.Entry entry = asyncClient.getTableLocationEntry( + AsyncKuduClient.MASTER_TABLE_NAME_PLACEHOLDER, null); + if (entry == null) { + // If there's no leader master, this will time out and throw an exception. + listTabletServers(); + + entry = asyncClient.getTableLocationEntry( + AsyncKuduClient.MASTER_TABLE_NAME_PLACEHOLDER, null); + } + Preconditions.checkNotNull(entry); + Preconditions.checkState(!entry.isNonCoveredRange()); + ServerInfo info = entry.getTablet().getLeaderServerInfo(); + Preconditions.checkNotNull(info); + return info.getHostAndPort(); + } + + // Helper method to handle joining and transforming the Exception we receive. + static R joinAndHandleException(Deferred deferred) throws KuduException { + try { + return deferred.join(); + } catch (Exception e) { + throw KuduException.transformException(e); + } + } + + /** + * Builder class to use in order to connect to Kudu. + * All the parameters beyond those in the constructors are optional. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static final class KuduClientBuilder { + private AsyncKuduClient.AsyncKuduClientBuilder clientBuilder; + + /** + * Creates a new builder for a client that will connect to the specified masters. + * @param masterAddresses comma-separated list of "host:port" pairs of the masters + */ + public KuduClientBuilder(String masterAddresses) { + clientBuilder = new AsyncKuduClient.AsyncKuduClientBuilder(masterAddresses); + } + + /** + * Creates a new builder for a client that will connect to the specified masters. + * + *

Here are some examples of recognized formats: + *

    + *
  • example.com + *
  • example.com:80 + *
  • 192.0.2.1 + *
  • 192.0.2.1:80 + *
  • [2001:db8::1] + *
  • [2001:db8::1]:80 + *
  • 2001:db8::1 + *
+ * + * @param masterAddresses list of master addresses + */ + public KuduClientBuilder(List masterAddresses) { + clientBuilder = new AsyncKuduClient.AsyncKuduClientBuilder(masterAddresses); + } + + /** + * Sets the default timeout used for administrative operations (e.g. createTable, deleteTable, + * etc). + * Optional. + * If not provided, defaults to 30s. + * A value of 0 disables the timeout. + * @param timeoutMs a timeout in milliseconds + * @return this builder + */ + public KuduClientBuilder defaultAdminOperationTimeoutMs(long timeoutMs) { + clientBuilder.defaultAdminOperationTimeoutMs(timeoutMs); + return this; + } + + /** + * Sets the default timeout used for user operations (using sessions and scanners). + * Optional. + * If not provided, defaults to 30s. + * A value of 0 disables the timeout. + * @param timeoutMs a timeout in milliseconds + * @return this builder + */ + public KuduClientBuilder defaultOperationTimeoutMs(long timeoutMs) { + clientBuilder.defaultOperationTimeoutMs(timeoutMs); + return this; + } + + /** + * Sets the default timeout used for connection negotiation. + * Optional. + * If not provided, defaults to 10s. + * @param timeoutMs a timeout in milliseconds + * @return this builder + */ + public KuduClientBuilder connectionNegotiationTimeoutMs(long timeoutMs) { + clientBuilder.connectionNegotiationTimeoutMs(timeoutMs); + return this; + } + + /** + * Socket read timeouts are no longer used in the Java client and have no effect. + * Setting this has no effect. + * @param timeoutMs a timeout in milliseconds + * @return this builder + * @deprecated socket read timeouts are no longer used + */ + @Deprecated + public KuduClientBuilder defaultSocketReadTimeoutMs(long timeoutMs) { + LOG.info("defaultSocketReadTimeoutMs is deprecated"); + return this; + } + + /** + * Disable this client's collection of statistics. + * Statistics are enabled by default. + * @return this builder + */ + public KuduClientBuilder disableStatistics() { + clientBuilder.disableStatistics(); + return this; + } + + /** + * @deprecated the bossExecutor is no longer used and will have no effect if provided + */ + @Deprecated + public KuduClientBuilder nioExecutors(Executor bossExecutor, Executor workerExecutor) { + clientBuilder.nioExecutors(bossExecutor, workerExecutor); + return this; + } + + /** + * Set the executor which will be used for the embedded Netty workers. + * + * Optional. + * If not provided, uses a simple cached threadpool. If workerExecutor is null, + * then such a thread pool will be used. + * Note: executor's max thread number must be greater or equal to corresponding + * worker count, or netty cannot start enough threads, and client will get stuck. + * If not sure, please just use CachedThreadPool. + */ + public KuduClientBuilder nioExecutor(Executor workerExecutor) { + clientBuilder.nioExecutor(workerExecutor); + return this; + } + + /** + * @deprecated the bossExecutor is no longer used and will have no effect if provided + */ + @Deprecated + public KuduClientBuilder bossCount(int bossCount) { + LOG.info("bossCount is deprecated"); + return this; + } + + /** + * Set the maximum number of worker threads. + * A worker thread performs non-blocking read and write for one or more + * Netty Channels in a non-blocking mode. + * + * Optional. + * If not provided, (2 * the number of available processors) is used. If + * this client instance will be used on a machine running many client + * instances, it may be wise to lower this count, for example to avoid + * resource limits, at the possible cost of some performance of this client + * instance. + */ + public KuduClientBuilder workerCount(int workerCount) { + clientBuilder.workerCount(workerCount); + return this; + } + + /** + * Set the SASL protocol name. + * SASL protocol name is used when connecting to a secure (Kerberos-enabled) + * cluster. It must match the servers' service principal name (SPN). + * + * Optional. + * If not provided, it will use the default SASL protocol name ("kudu"). + * @return this builder + */ + public KuduClientBuilder saslProtocolName(String saslProtocolName) { + clientBuilder.saslProtocolName(saslProtocolName); + return this; + } + + /** + * Require authentication for the connection to a remote server. + * + * If it's set to true, the client will require mutual authentication between + * the server and the client. If the server doesn't support authentication, + * or it's disabled, the client will fail to connect. + */ + public KuduClientBuilder requireAuthentication(boolean requireAuthentication) { + clientBuilder.requireAuthentication(requireAuthentication); + return this; + } + + /** + * Require encryption for the connection to a remote server. + * + * If it's set to REQUIRED or REQUIRED_LOOPBACK, the client will + * require encrypting the traffic between the server and the client. + * If the server doesn't support encryption, or if it's disabled, the + * client will fail to connect. + * + * Loopback connections are encrypted only if 'encryption_policy' is + * set to REQUIRE_LOOPBACK, or if it's required by the server. + * + * The default value is OPTIONAL, which allows connecting to servers without + * encryption as well, but it will still attempt to use it if the server + * supports it. + */ + public KuduClientBuilder encryptionPolicy(AsyncKuduClient.EncryptionPolicy encryptionPolicy) { + clientBuilder.encryptionPolicy(encryptionPolicy); + return this; + } + + /** + * Creates a new client that connects to the masters. + * Doesn't block and won't throw an exception if the masters don't exist. + * @return a new asynchronous Kudu client + */ + public KuduClient build() { + AsyncKuduClient client = clientBuilder.build(); + return new KuduClient(client); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduException.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduException.java new file mode 100644 index 0000000000..c9aa9a6f06 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduException.java @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.kudu.client; + +import java.io.IOException; + +import com.stumbleupon.async.DeferredGroupException; +import com.stumbleupon.async.TimeoutException; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * The parent class of all exceptions sent by the Kudu client. This is the only exception you will + * see if you're using the non-async API, such as {@link KuduSession} instead of + * {@link AsyncKuduSession}. + * + * Each instance of this class has a {@link Status} which gives more information about the error. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +@SuppressWarnings("serial") +public abstract class KuduException extends IOException { + private final Status status; + + /** + * Constructor. + * @param status object containing the reason for the exception + * trace. + */ + KuduException(Status status) { + super(status.getMessage()); + this.status = status; + } + + /** + * Constructor. + * @param status object containing the reason for the exception + * @param cause The exception that caused this one to be thrown. + */ + KuduException(Status status, Throwable cause) { + super(status.getMessage(), cause); + this.status = status; + } + + /** + * Get the Status object for this exception. + * @return a status object indicating the reason for the exception + */ + public Status getStatus() { + return status; + } + + /** + * When exceptions are thrown by the asynchronous Kudu client, the stack trace is + * typically deep within the internals of the Kudu client and/or Netty. + * Thus, when the synchronous Kudu client wraps and throws the exception, + * we suppress that stack trace and replace it with the stack trace of the user's + * calling thread. The original stack trace is added to the {@link KuduException} + * as a suppressed exception (see Throwable#addSuppressed(Throwable)) of + * this + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static class OriginalException extends Throwable { + private OriginalException(Throwable e) { + super("Original asynchronous stack trace"); + setStackTrace(e.getStackTrace()); + } + } + + /** + * Inspects the given exception and transforms it into a KuduException. + * @param e generic exception we want to transform + * @return a KuduException that's easier to handle + */ + static KuduException transformException(Exception e) { + // The message may be null. + String message = e.getMessage() == null ? "" : e.getMessage(); + if (e instanceof KuduException) { + // The exception thrown inside the async code has a stack trace + // that doesn't correspond to where the user actually called + // some synchronous method. This can be very confusing for + // users, so we'll reset the stack trace back the call frame + // where we are transforming it. + e.addSuppressed(new OriginalException(e)); + StackTraceElement[] stack = new Exception().getStackTrace(); + e.setStackTrace(stack); + return (KuduException) e; + } else if (e instanceof DeferredGroupException) { + // The cause of a DeferredGroupException is the first exception it sees, we're just going to + // use it as our main exception. DGE doesn't let us see the other exceptions anyways. + Throwable cause = e.getCause(); + if (cause instanceof Exception) { + return transformException((Exception) cause); + } + // Else fall down into a generic exception at the end. + } else if (e instanceof TimeoutException) { + Status statusTimeout = Status.TimedOut(message); + return new NonRecoverableException(statusTimeout, e); + } else if (e instanceof InterruptedException) { + // Need to reset the interrupt flag since we caught it but aren't handling it. + Thread.currentThread().interrupt(); + + Status statusAborted = Status.Aborted(message); + return new NonRecoverableException(statusAborted, e); + } + Status status = Status.IOError(message); + return new NonRecoverableException(status, e); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduMetrics.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduMetrics.java new file mode 100644 index 0000000000..83cda05d17 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduMetrics.java @@ -0,0 +1,228 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.util.stream.Collectors.joining; + +import java.util.Comparator; + +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.composite.CompositeMeterRegistry; +import io.micrometer.core.instrument.config.MeterFilter; +import io.micrometer.core.instrument.config.NamingConvention; +import io.micrometer.core.instrument.search.Search; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A static utility class to contain constants and methods for working with + * Kudu Java client metrics. + * + * NOTE: The metrics are not considered public API yet. We should not expose + * micrometer objects/classes through any public interface or method, even + * when we do make them public. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class KuduMetrics { + private static final Logger LOG = LoggerFactory.getLogger(KuduMetrics.class); + + public static final String[] EMPTY_TAGS = new String[]{}; + + // RPC Metrics + public static final KuduMetricId RPC_REQUESTS_METRIC = + new KuduMetricId("rpc.requests", "A count of the sent request RPCs", "requests"); + public static final KuduMetricId RPC_RETRIES_METRIC = + new KuduMetricId("rpc.retries", "A count of the retried request RPCs", "retries"); + public static final KuduMetricId RPC_RESPONSE_METRIC = + new KuduMetricId("rpc.responses", "A count of the RPC responses received", "responses"); + + // Common Tags + public static final String CLIENT_ID_TAG = "client.id"; + public static final String SERVER_ID_TAG = "server.id"; + public static final String SERVICE_NAME_TAG = "service.name"; + public static final String METHOD_NAME_TAG = "method.name"; + + // TODO(KUDU-3148): After extensive testing consider enabling metrics by default. + private static boolean enabled = false; + private static CompositeMeterRegistry registry = createDisabledRegistry(); + + /** + * This class is meant to be used statically. + */ + private KuduMetrics() { + } + + /** + * Enable or disable metric tracking. + * Disabling the metrics will discard any previously recorded metrics. + * + * @param enable If true, metric tracking is enabled. + */ + public static synchronized void setEnabled(boolean enable) { + if (enable && !enabled) { + CompositeMeterRegistry oldRegistry = registry; + registry = createRegistry(); + enabled = true; + oldRegistry.close(); + } else if (!enable && enabled) { + CompositeMeterRegistry oldRegistry = registry; + registry = createDisabledRegistry(); + enabled = false; + oldRegistry.close(); + } + } + + private static CompositeMeterRegistry createRegistry() { + CompositeMeterRegistry registry = new CompositeMeterRegistry(); + // This is the default naming convention that separates lowercase words + // with a '.' (dot) character. + registry.config().namingConvention(NamingConvention.dot); + // Use the minimal meter registry. Once this is used/useful for more than tests + // we may want to consider something more exposed such as JMX. + registry.add(new SimpleMeterRegistry()); + return registry; + } + + private static CompositeMeterRegistry createDisabledRegistry() { + CompositeMeterRegistry registry = createRegistry(); + // Add a filter to deny all meters. When a meter is used with this registry, + // the registry will return a NOOP version of that meter. Anything recorded + // to it is discarded immediately with minimal overhead. + registry.config().meterFilter(MeterFilter.deny()); + return registry; + } + + /** + * @return the total number of registered metrics. + */ + static int numMetrics() { + return registry.getMeters().size(); + } + + /** + * @param id the metric id + * @return the number of all the matching metrics. + */ + static int numMetrics(KuduMetricId id) { + return numMetrics(id, EMPTY_TAGS); + } + + /** + * @param id the metric id + * @param tags tags must be an even number of arguments representing key/value pairs of tags. + * @return the sum of all the matching metrics. + */ + static int numMetrics(KuduMetricId id, String... tags) { + return Search.in(registry).name(id.name).tags(tags).counters().size(); + } + + /** + * @param tags tags must be an even number of arguments representing key/value pairs of tags. + * @return the sum of all the matching metrics. + */ + static int numMetrics(String... tags) { + return Search.in(registry).tags(tags).counters().size(); + } + + /** + * Returns the counter meter for the given metric id and tags. + * If the meter is already registered, it will lookup the existing meter and + * return it. Otherwise it will register a new meter and return that. + * + * @param id the metric id + * @param tags tags must be an even number of arguments representing key/value pairs of tags. + * @return a counter + */ + static Counter counter(KuduMetricId id, String... tags) { + return Counter.builder(id.getName()) + .description(id.getDescription()) + .baseUnit(id.getUnit()) + .tags(tags) + .register(registry); + } + + /** + * @param id the metric id + * @return the sum of all the matching metrics. + */ + public static double totalCount(KuduMetricId id) { + return totalCount(id, EMPTY_TAGS); + } + + /** + * @param id the metric id + * @param tags tags must be an even number of arguments representing key/value pairs of tags. + * @return the sum of all the matching metrics. + */ + public static double totalCount(KuduMetricId id, String... tags) { + return Search.in(registry).name(id.name).tags(tags).counters().stream() + .mapToDouble(Counter::count).sum(); + } + + /** + * Logs the metric values at the INFO level one metric per line. + * The output format for each metric is: + * {=,...} : + */ + static void logMetrics() { + registry.getMeters().stream() + // Sort by id to ensure the same order each time. + .sorted(Comparator.comparing(m -> m.getId().toString())) + .forEach(m -> { + // Generate tags string as {k=v,...} + String tags = m.getId().getTags().stream() + .sorted() + .map(t -> t.getKey() + "=" + t.getValue()) + .collect(joining(",", "{", "}")); + String key = m.getId().getName() + " " + tags; + String value = "unknown"; + if (m instanceof Counter) { + value = ((Counter) m).count() + " " + m.getId().getBaseUnit(); + } + LOG.info(key + " : " + value); + }); + } + + private static class KuduMetricId { + private final String name; + private final String description; + private final String unit; + + private KuduMetricId(String name, String description, String unit) { + this.name = name; + this.description = description; + this.unit = unit; + } + + public String getName() { + return name; + } + + public String getDescription() { + return description; + } + + public String getUnit() { + return unit; + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduPartitioner.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduPartitioner.java new file mode 100644 index 0000000000..5f2e20f415 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduPartitioner.java @@ -0,0 +1,246 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.NavigableMap; +import java.util.TreeMap; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * A KuduPartitioner allows clients to determine the target partition of a + * row without actually performing a write. The set of partitions is eagerly + * fetched when the KuduPartitioner is constructed so that the actual partitioning + * step can be performed synchronously without any network trips. + * + * NOTE: Because this operates on a metadata snapshot retrieved at construction + * time, it will not reflect any metadata changes to the table that have occurred + * since its creation. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class KuduPartitioner { + private static final BytesKey EMPTY = new BytesKey(new byte[0]); + private static final int NON_COVERED_RANGE_INDEX = -1; + + private final PartitionSchema partitionSchema; + private final Map tabletIdToPartition; + private final NavigableMap partitionByStartKey; + private final int numPartitions; + + @InterfaceAudience.Private + @InterfaceStability.Unstable + public KuduPartitioner(PartitionSchema partitionSchema, + Map tabletIdToPartition) { + // TODO(ghenke): Could also build a map of partition index to tablet ID which would + // be useful for identifying which tablet a given row would come from. + NavigableMap partitionByStartKey = new TreeMap<>(); + // Insert a sentinel for the beginning of the table, in case a user + // queries for any row which falls before the first partition. + partitionByStartKey.put(EMPTY, NON_COVERED_RANGE_INDEX); + int index = 0; + for (Map.Entry entry : tabletIdToPartition.entrySet()) { + BytesKey keyStart = new BytesKey(entry.getValue().partitionKeyStart); + BytesKey keyEnd = new BytesKey(entry.getValue().partitionKeyEnd); + partitionByStartKey.put(keyStart, index++); + // Set the start of the next non-covered range to have the NON_COVERED_RANGE_INDEX. + // As we process partitions, if a partition covers this range, the keyStart will be + // equal to this keyEnd and the NON_COVERED_RANGE_INDEX will be replaced with the index + // of that partition. + partitionByStartKey.putIfAbsent(keyEnd, NON_COVERED_RANGE_INDEX); + } + this.partitionSchema = partitionSchema; + this.tabletIdToPartition = tabletIdToPartition; + this.partitionByStartKey = partitionByStartKey; + this.numPartitions = tabletIdToPartition.size(); + } + + /** + * @return the number of partitions known by this partitioner. + */ + public int numPartitions() { + return this.numPartitions; + } + + /** + * Determine if the given row falls into a valid partition. + * + * NOTE: The row must be constructed with a schema returned from the Kudu server. + * ex: `KuduTable.getSchema().newPartialRow();` + * + * @param row The row to check. + * @return true if the row falls into a valid partition. + */ + public boolean isCovered(PartialRow row) { + BytesKey partitionKey = new BytesKey(encodePartitionKey(row)); + // The greatest key that is less than or equal to the given key. + Map.Entry floor = partitionByStartKey.floorEntry(partitionKey); + return floor.getValue() != NON_COVERED_RANGE_INDEX; + } + + /** + * Determine the partition index that the given row falls into. + * + * NOTE: The row must be constructed with a schema returned from the Kudu server. + * ex: `KuduTable.getSchema().newPartialRow();` + * + * @param row The row to be partitioned. + * @return The resulting partition index. + * The result will be less than numPartitions() + * @throws NonCoveredRangeException if the row falls into a non-covered range. + */ + public int partitionRow(PartialRow row) throws NonCoveredRangeException { + BytesKey partitionKey = new BytesKey(encodePartitionKey(row)); + // The greatest key that is less than or equal to the given key. + Map.Entry floor = partitionByStartKey.floorEntry(partitionKey); + if (floor.getValue() == NON_COVERED_RANGE_INDEX) { + Map.Entry ceiling = partitionByStartKey.ceilingEntry(partitionKey); + throw new NonCoveredRangeException(floor.getKey().bytes, ceiling.getKey().bytes); + } + return floor.getValue(); + } + + private byte[] encodePartitionKey(PartialRow row) { + // Column IDs are required to encode the partition key. + Preconditions.checkArgument(row.getSchema().hasColumnIds(), + "The row must be constructed with a schema returned from the server. " + + "(ex: KuduTable.getSchema().newPartialRow();"); + return partitionSchema.encodePartitionKey(row); + } + + /** + * @return the internal map of tablet ID to Partition. + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public Map getTabletMap() { + return tabletIdToPartition; + } + + /** + * A wrapper around a byte array that implements the Comparable interface + * allowing it to be used as the key in map. + */ + private static class BytesKey implements Comparable { + + private final byte[] bytes; + + BytesKey(byte[] bytes) { + this.bytes = bytes; + } + + public boolean isEmpty() { + return bytes.length == 0; + } + + @Override + public int compareTo(BytesKey other) { + return Bytes.memcmp(this.bytes, other.bytes); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof BytesKey)) { + return false; + } + BytesKey bytesKey = (BytesKey) o; + return compareTo(bytesKey) == 0; + } + + @Override + public int hashCode() { + return Arrays.hashCode(bytes); + } + + @Override + public String toString() { + return Bytes.hex(bytes); + } + } + + /** + * A Builder class to build {@link KuduPartitioner}. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static class KuduPartitionerBuilder { + + private final KuduTable table; + private long timeoutMillis; + + public KuduPartitionerBuilder(KuduTable table) { + this.table = table; + this.timeoutMillis = table.getAsyncClient().getDefaultAdminOperationTimeoutMs(); + } + + /** + * Set the timeout used for building the {@link KuduPartitioner}. + * Defaults to the {@link AsyncKuduClient#getDefaultAdminOperationTimeoutMs()}. + * @param timeoutMillis the timeout to set in milliseconds. + */ + public KuduPartitionerBuilder buildTimeout(long timeoutMillis) { + this.timeoutMillis = timeoutMillis; + return this; + } + + /** + * Builds a {@link KuduPartitioner} using the passed configurations. + * @return a new {@link KuduPartitioner} + */ + public KuduPartitioner build() throws KuduException { + final TimeoutTracker timeoutTracker = new TimeoutTracker(); + timeoutTracker.setTimeout(timeoutMillis); + // Use a LinkedHashMap to maintain partition order. + // This isn't strictly required, but it means that partitions with lower ranges + // will have lower partition index since this map is processed in a for + // loop when constructing the KuduPartitioner. + LinkedHashMap tabletIdToPartition = new LinkedHashMap<>(); + byte[] nextPartKey = EMPTY.bytes; + while (true) { + LocatedTablet tablet; + try { + tablet = KuduClient.joinAndHandleException( + table.getAsyncClient().getTabletLocation(table, + nextPartKey, AsyncKuduClient.LookupType.LOWER_BOUND, + timeoutTracker.getMillisBeforeTimeout())); + } catch (NonCoveredRangeException ncr) { + // No more tablets + break; + } + String tabletId = new String(tablet.getTabletId(), UTF_8); + tabletIdToPartition.put(tabletId, tablet.getPartition()); + byte[] keyEnd = tablet.getPartition().partitionKeyEnd; + if (keyEnd.length == 0) { + break; + } + nextPartKey = keyEnd; + } + return new KuduPartitioner(table.getPartitionSchema(), tabletIdToPartition); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java new file mode 100644 index 0000000000..67c611e058 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java @@ -0,0 +1,1263 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +import com.google.common.base.Joiner; +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; +import com.google.common.primitives.UnsignedBytes; +import com.google.protobuf.ByteString; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnTypeAttributes; +import org.apache.kudu.Common; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.util.DateUtil; +import org.apache.kudu.util.DecimalUtil; +import org.apache.kudu.util.TimestampUtil; + +/** + * A predicate which can be used to filter rows based on the value of a column. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class KuduPredicate { + + /** + * The predicate type. + */ + @InterfaceAudience.Private + enum PredicateType { + /** A predicate which filters all rows. */ + NONE, + /** A predicate which filters all rows not equal to a value. */ + EQUALITY, + /** A predicate which filters all rows not in a range. */ + RANGE, + /** A predicate which filters all null rows. */ + IS_NOT_NULL, + /** A predicate which filters all non-null rows. */ + IS_NULL, + /** A predicate which filters all rows not matching a list of values. */ + IN_LIST, + } + + /** + * The comparison operator of a predicate. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public enum ComparisonOp { + GREATER, + GREATER_EQUAL, + EQUAL, + LESS, + LESS_EQUAL, + } + + private final PredicateType type; + private final ColumnSchema column; + + /** + * The inclusive lower bound value if this is a Range predicate, or + * the createEquality value if this is an Equality predicate. + */ + private final byte[] lower; + + /** The exclusive upper bound value if this is a Range predicate. */ + private final byte[] upper; + + /** IN-list values. */ + private final byte[][] inListValues; + + /** + * Creates a new {@code KuduPredicate} on a boolean column. + * @param column the column schema + * @param op the comparison operation + * @param value the value to compare against + */ + public static KuduPredicate newComparisonPredicate(ColumnSchema column, + ComparisonOp op, + boolean value) { + checkColumn(column, Type.BOOL); + // Create the comparison predicate. Range predicates on boolean values can + // always be converted to either an equality, an IS NOT NULL (filtering only + // null values), or NONE (filtering all values). + switch (op) { + case GREATER: { + // b > true -> b NONE + // b > false -> b = true + if (value) { + return none(column); + } else { + return new KuduPredicate(PredicateType.EQUALITY, column, Bytes.fromBoolean(true), null); + } + } + case GREATER_EQUAL: { + // b >= true -> b = true + // b >= false -> b IS NOT NULL + if (value) { + return new KuduPredicate(PredicateType.EQUALITY, column, Bytes.fromBoolean(true), null); + } else { + return newIsNotNullPredicate(column); + } + } + case EQUAL: return new KuduPredicate(PredicateType.EQUALITY, column, + Bytes.fromBoolean(value), null); + case LESS: { + // b < true -> b NONE + // b < false -> b = true + if (value) { + return new KuduPredicate(PredicateType.EQUALITY, column, Bytes.fromBoolean(false), null); + } else { + return none(column); + } + } + case LESS_EQUAL: { + // b <= true -> b IS NOT NULL + // b <= false -> b = false + if (value) { + return newIsNotNullPredicate(column); + } else { + return new KuduPredicate(PredicateType.EQUALITY, column, Bytes.fromBoolean(false), null); + } + } + default: throw new RuntimeException("unknown comparison op"); + } + } + + /** + * Creates a new comparison predicate on an integer or timestamp column. + * @param column the column schema + * @param op the comparison operation + * @param value the value to compare against + */ + public static KuduPredicate newComparisonPredicate(ColumnSchema column, + ComparisonOp op, + long value) { + checkColumn(column, Type.INT8, Type.INT16, Type.INT32, Type.INT64, Type.UNIXTIME_MICROS, + Type.DATE); + long minValue = minIntValue(column.getType()); + long maxValue = maxIntValue(column.getType()); + Preconditions.checkArgument(value <= maxValue && value >= minValue, + "integer value out of range for %s column: %s", + column.getType(), value); + + if (op == ComparisonOp.LESS_EQUAL) { + if (value == maxValue) { + // If the value can't be incremented because it is at the top end of the + // range, then substitute the predicate with an IS NOT NULL predicate. + // This has the same effect as an inclusive upper bound on the maximum + // value. If the column is not nullable then the IS NOT NULL predicate + // is ignored. + return newIsNotNullPredicate(column); + } + value += 1; + op = ComparisonOp.LESS; + } else if (op == ComparisonOp.GREATER) { + if (value == maxValue) { + return none(column); + } + value += 1; + op = ComparisonOp.GREATER_EQUAL; + } + + byte[] bytes; + switch (column.getType()) { + case INT8: { + bytes = new byte[] { (byte) value }; + break; + } + case INT16: { + bytes = Bytes.fromShort((short) value); + break; + } + case DATE: + case INT32: { + bytes = Bytes.fromInt((int) value); + break; + } + case INT64: + case UNIXTIME_MICROS: { + bytes = Bytes.fromLong(value); + break; + } + default: + throw new RuntimeException("already checked"); + } + switch (op) { + case GREATER_EQUAL: + if (value == minValue) { + return newIsNotNullPredicate(column); + } else if (value == maxValue) { + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + } + return new KuduPredicate(PredicateType.RANGE, column, bytes, null); + case EQUAL: + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + case LESS: + if (value == minValue) { + return none(column); + } + return new KuduPredicate(PredicateType.RANGE, column, null, bytes); + default: + throw new RuntimeException("unknown comparison op"); + } + } + + /** + * Creates a new comparison predicate on a Decimal column. + * @param column the column schema + * @param op the comparison operation + * @param value the value to compare against + */ + @SuppressWarnings("BigDecimalEquals") + public static KuduPredicate newComparisonPredicate(ColumnSchema column, + ComparisonOp op, + BigDecimal value) { + checkColumn(column, Type.DECIMAL); + ColumnTypeAttributes typeAttributes = column.getTypeAttributes(); + int precision = typeAttributes.getPrecision(); + int scale = typeAttributes.getScale(); + + // Coerce the value to have the same precision and scale + value = DecimalUtil.coerce(value, precision, scale); + + BigDecimal minValue = DecimalUtil.minValue(precision, scale); + BigDecimal maxValue = DecimalUtil.maxValue(precision, scale); + Preconditions.checkArgument(value.compareTo(maxValue) <= 0 && value.compareTo(minValue) >= 0, + "Decimal value out of range for %s column: %s", + column.getType(), value); + BigDecimal smallestValue = DecimalUtil.smallestValue(scale); + + if (op == ComparisonOp.LESS_EQUAL) { + if (value.equals(maxValue)) { + // If the value can't be incremented because it is at the top end of the + // range, then substitute the predicate with an IS NOT NULL predicate. + // This has the same effect as an inclusive upper bound on the maximum + // value. If the column is not nullable then the IS NOT NULL predicate + // is ignored. + return newIsNotNullPredicate(column); + } + value = value.add(smallestValue); + op = ComparisonOp.LESS; + } else if (op == ComparisonOp.GREATER) { + if (value.equals(maxValue)) { + return none(column); + } + value = value.add(smallestValue); + op = ComparisonOp.GREATER_EQUAL; + } + + byte[] bytes = Bytes.fromBigDecimal(value, precision); + + switch (op) { + case GREATER_EQUAL: + if (value.equals(minValue)) { + return newIsNotNullPredicate(column); + } else if (value.equals(maxValue)) { + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + } + return new KuduPredicate(PredicateType.RANGE, column, bytes, null); + case EQUAL: + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + case LESS: + if (value.equals(minValue)) { + return none(column); + } + return new KuduPredicate(PredicateType.RANGE, column, null, bytes); + default: + throw new RuntimeException("unknown comparison op"); + } + } + + /** + * Creates a new comparison predicate on a timestamp column. + * @param column the column schema + * @param op the comparison operation + * @param value the value to compare against + */ + public static KuduPredicate newComparisonPredicate(ColumnSchema column, + ComparisonOp op, + Timestamp value) { + checkColumn(column, Type.UNIXTIME_MICROS); + long micros = TimestampUtil.timestampToMicros(value); + return newComparisonPredicate(column, op, micros); + } + + /** + * Creates a new comparison predicate on a date column. + * @param column the column schema + * @param op the comparison operation + * @param value the value to compare against + */ + public static KuduPredicate newComparisonPredicate(ColumnSchema column, + ComparisonOp op, + Date value) { + checkColumn(column, Type.DATE); + int days = DateUtil.sqlDateToEpochDays(value); + return newComparisonPredicate(column, op, days); + } + + /** + * Creates a new comparison predicate on a float column. + * @param column the column schema + * @param op the comparison operation + * @param value the value to compare against + */ + public static KuduPredicate newComparisonPredicate(ColumnSchema column, + ComparisonOp op, + float value) { + checkColumn(column, Type.FLOAT); + if (op == ComparisonOp.LESS_EQUAL) { + if (value == Float.POSITIVE_INFINITY) { + return newIsNotNullPredicate(column); + } + value = Math.nextAfter(value, Float.POSITIVE_INFINITY); + op = ComparisonOp.LESS; + } else if (op == ComparisonOp.GREATER) { + if (value == Float.POSITIVE_INFINITY) { + return none(column); + } + value = Math.nextAfter(value, Float.POSITIVE_INFINITY); + op = ComparisonOp.GREATER_EQUAL; + } + + byte[] bytes = Bytes.fromFloat(value); + switch (op) { + case GREATER_EQUAL: + if (value == Float.NEGATIVE_INFINITY) { + return newIsNotNullPredicate(column); + } else if (value == Float.POSITIVE_INFINITY) { + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + } + return new KuduPredicate(PredicateType.RANGE, column, bytes, null); + case EQUAL: + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + case LESS: + if (value == Float.NEGATIVE_INFINITY) { + return none(column); + } + return new KuduPredicate(PredicateType.RANGE, column, null, bytes); + default: + throw new RuntimeException("unknown comparison op"); + } + } + + /** + * Creates a new comparison predicate on a double column. + * @param column the column schema + * @param op the comparison operation + * @param value the value to compare against + */ + public static KuduPredicate newComparisonPredicate(ColumnSchema column, + ComparisonOp op, + double value) { + checkColumn(column, Type.DOUBLE); + if (op == ComparisonOp.LESS_EQUAL) { + if (value == Double.POSITIVE_INFINITY) { + return newIsNotNullPredicate(column); + } + value = Math.nextAfter(value, Double.POSITIVE_INFINITY); + op = ComparisonOp.LESS; + } else if (op == ComparisonOp.GREATER) { + if (value == Double.POSITIVE_INFINITY) { + return none(column); + } + value = Math.nextAfter(value, Double.POSITIVE_INFINITY); + op = ComparisonOp.GREATER_EQUAL; + } + + byte[] bytes = Bytes.fromDouble(value); + switch (op) { + case GREATER_EQUAL: + if (value == Double.NEGATIVE_INFINITY) { + return newIsNotNullPredicate(column); + } else if (value == Double.POSITIVE_INFINITY) { + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + } + return new KuduPredicate(PredicateType.RANGE, column, bytes, null); + case EQUAL: + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + case LESS: + if (value == Double.NEGATIVE_INFINITY) { + return none(column); + } + return new KuduPredicate(PredicateType.RANGE, column, null, bytes); + default: + throw new RuntimeException("unknown comparison op"); + } + } + + /** + * Creates a new comparison predicate on a string column. + * @param column the column schema + * @param op the comparison operation + * @param value the value to compare against + */ + public static KuduPredicate newComparisonPredicate(ColumnSchema column, + ComparisonOp op, + String value) { + checkColumn(column, Type.STRING, Type.VARCHAR); + + byte[] bytes = Bytes.fromString(value); + if (op == ComparisonOp.LESS_EQUAL) { + bytes = Arrays.copyOf(bytes, bytes.length + 1); + op = ComparisonOp.LESS; + } else if (op == ComparisonOp.GREATER) { + bytes = Arrays.copyOf(bytes, bytes.length + 1); + op = ComparisonOp.GREATER_EQUAL; + } + + switch (op) { + case GREATER_EQUAL: + if (bytes.length == 0) { + return newIsNotNullPredicate(column); + } + return new KuduPredicate(PredicateType.RANGE, column, bytes, null); + case EQUAL: + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + case LESS: + if (bytes.length == 0) { + return none(column); + } + return new KuduPredicate(PredicateType.RANGE, column, null, bytes); + default: + throw new RuntimeException("unknown comparison op"); + } + } + + /** + * Creates a new comparison predicate on a binary column. + * @param column the column schema + * @param op the comparison operation + * @param value the value to compare against + */ + public static KuduPredicate newComparisonPredicate(ColumnSchema column, + ComparisonOp op, + byte[] value) { + checkColumn(column, Type.BINARY); + + if (op == ComparisonOp.LESS_EQUAL) { + value = Arrays.copyOf(value, value.length + 1); + op = ComparisonOp.LESS; + } else if (op == ComparisonOp.GREATER) { + value = Arrays.copyOf(value, value.length + 1); + op = ComparisonOp.GREATER_EQUAL; + } + + switch (op) { + case GREATER_EQUAL: + if (value.length == 0) { + return newIsNotNullPredicate(column); + } + return new KuduPredicate(PredicateType.RANGE, column, value, null); + case EQUAL: + return new KuduPredicate(PredicateType.EQUALITY, column, value, null); + case LESS: + if (value.length == 0) { + return none(column); + } + return new KuduPredicate(PredicateType.RANGE, column, null, value); + default: + throw new RuntimeException("unknown comparison op"); + } + } + + /** + * Creates a new comparison predicate on a column. + * + * This method is useful when you don't care about autoboxing + * and your existing type handling logic is based on Java types. + * + * The accepted Object type is based on the column's {@link Type}: + * Type.BOOL -> java.lang.Boolean + * Type.INT8 -> java.lang.Byte + * Type.INT16 -> java.lang.Short + * Type.INT32 -> java.lang.Integer + * Type.INT64 -> java.lang.Long + * Type.UNIXTIME_MICROS -> java.sql.Timestamp or java.lang.Long + * Type.FLOAT -> java.lang.Float + * Type.DOUBLE -> java.lang.Double + * Type.STRING -> java.lang.String + * Type.VARCHAR -> java.lang.String + * Type.BINARY -> byte[] + * Type.DECIMAL -> java.math.BigDecimal + * Type.DATE -> java.sql.Date + * + * @param column column the column schema + * @param op the comparison operation + * @param value the value to compare against + */ + public static KuduPredicate newComparisonPredicate(ColumnSchema column, + ComparisonOp op, + Object value) { + if (value instanceof Boolean) { + return newComparisonPredicate(column, op, (boolean) value); + } else if (value instanceof Byte) { + return newComparisonPredicate(column, op, (byte) value); + } else if (value instanceof Short) { + return newComparisonPredicate(column, op, (short) value); + } else if (value instanceof Integer) { + return newComparisonPredicate(column, op, (int) value); + } else if (value instanceof Long) { + return newComparisonPredicate(column, op, (long) value); + } else if (value instanceof Timestamp) { + return newComparisonPredicate(column, op, (Timestamp) value); + } else if (value instanceof Float) { + return newComparisonPredicate(column, op, (float) value); + } else if (value instanceof Double) { + return newComparisonPredicate(column, op, (double) value); + } else if (value instanceof BigDecimal) { + return newComparisonPredicate(column, op, (BigDecimal) value); + } else if (value instanceof String) { + return newComparisonPredicate(column, op, (String) value); + } else if (value instanceof byte[]) { + return newComparisonPredicate(column, op, (byte[]) value); + } else if (value instanceof Date) { + return newComparisonPredicate(column, op, (Date) value); + } else { + throw new IllegalArgumentException(String.format("illegal type for %s predicate: %s", + op, value.getClass().getName())); + } + } + + /** + * Creates a new IN list predicate. + * + * The list must contain values of the correct type for the column. + * + * @param column the column that the predicate applies to + * @param values list of values which the column values must match + * @param the type of values, must match the type of the column + * @return an IN list predicate + */ + public static KuduPredicate newInListPredicate(final ColumnSchema column, List values) { + if (values.isEmpty()) { + return none(column); + } + T t = values.get(0); + + SortedSet vals = new TreeSet<>(new Comparator() { + @Override + public int compare(byte[] a, byte[] b) { + return KuduPredicate.compare(column, a, b); + } + }); + + if (t instanceof Boolean) { + checkColumn(column, Type.BOOL); + for (T value : values) { + vals.add(Bytes.fromBoolean((Boolean) value)); + } + } else if (t instanceof Byte) { + checkColumn(column, Type.INT8); + for (T value : values) { + vals.add(new byte[] {(Byte) value}); + } + } else if (t instanceof Short) { + checkColumn(column, Type.INT16); + for (T value : values) { + vals.add(Bytes.fromShort((Short) value)); + } + } else if (t instanceof Integer) { + checkColumn(column, Type.INT32, Type.DATE); + for (T value : values) { + vals.add(Bytes.fromInt((Integer) value)); + } + } else if (t instanceof Long) { + checkColumn(column, Type.INT64, Type.UNIXTIME_MICROS); + for (T value : values) { + vals.add(Bytes.fromLong((Long) value)); + } + } else if (t instanceof Float) { + checkColumn(column, Type.FLOAT); + for (T value : values) { + vals.add(Bytes.fromFloat((Float) value)); + } + } else if (t instanceof Double) { + checkColumn(column, Type.DOUBLE); + for (T value : values) { + vals.add(Bytes.fromDouble((Double) value)); + } + } else if (t instanceof BigDecimal) { + checkColumn(column, Type.DECIMAL); + for (T value : values) { + vals.add(Bytes.fromBigDecimal((BigDecimal) value, + column.getTypeAttributes().getPrecision())); + } + } else if (t instanceof String) { + checkColumn(column, Type.STRING, Type.VARCHAR); + for (T value : values) { + vals.add(Bytes.fromString((String) value)); + } + } else if (t instanceof byte[]) { + checkColumn(column, Type.BINARY); + for (T value : values) { + vals.add((byte[]) value); + } + } else if (t instanceof Date) { + checkColumn(column, Type.DATE); + for (T value : values) { + vals.add(Bytes.fromInt(DateUtil.sqlDateToEpochDays((Date) value))); + } + } else { + throw new IllegalArgumentException(String.format("illegal type for IN list values: %s", + t.getClass().getName())); + } + + return buildInList(column, vals); + } + + /** + * Creates a new {@code IS NOT NULL} predicate. + * + * @param column the column that the predicate applies to + * @return an {@code IS NOT NULL} predicate + */ + public static KuduPredicate newIsNotNullPredicate(ColumnSchema column) { + return new KuduPredicate(PredicateType.IS_NOT_NULL, column, null, null); + } + + /** + * Creates a new {@code IS NULL} predicate. + * + * @param column the column that the predicate applies to + * @return an {@code IS NULL} predicate + */ + public static KuduPredicate newIsNullPredicate(ColumnSchema column) { + if (!column.isNullable()) { + return none(column); + } + return new KuduPredicate(PredicateType.IS_NULL, column, null, null); + } + + /** + * @param type the predicate type + * @param column the column to which the predicate applies + * @param lower the lower bound serialized value if this is a Range predicate, + * or the equality value if this is an Equality predicate + * @param upper the upper bound serialized value if this is an Equality predicate + */ + @InterfaceAudience.LimitedPrivate("Test") + KuduPredicate(PredicateType type, ColumnSchema column, byte[] lower, byte[] upper) { + this.type = type; + this.column = column; + this.lower = lower; + this.upper = upper; + this.inListValues = null; + } + + /** + * Constructor for IN list predicate. + * @param column the column to which the predicate applies + * @param inListValues the encoded IN list values + */ + private KuduPredicate(ColumnSchema column, byte[][] inListValues) { + this.column = column; + this.type = PredicateType.IN_LIST; + this.lower = null; + this.upper = null; + this.inListValues = inListValues; + } + + /** + * Factory function for a {@code None} predicate. + * @param column the column to which the predicate applies + * @return a None predicate + */ + @InterfaceAudience.LimitedPrivate("Test") + static KuduPredicate none(ColumnSchema column) { + return new KuduPredicate(PredicateType.NONE, column, null, null); + } + + /** + * @return the type of this predicate + */ + PredicateType getType() { + return type; + } + + /** + * Merges another {@code ColumnPredicate} into this one, returning a new + * {@code ColumnPredicate} which matches the logical intersection ({@code AND}) + * of the input predicates. + * @param other the predicate to merge with this predicate + * @return a new predicate that is the logical intersection + */ + KuduPredicate merge(KuduPredicate other) { + Preconditions.checkArgument(column.equals(other.column), + "predicates from different columns may not be merged"); + + // First, consider other.type == NONE, IS_NOT_NULL, or IS_NULL + // NONE predicates dominate. + if (other.type == PredicateType.NONE) { + return other; + } + + // NOT NULL is dominated by all other predicates, + // except IS NULL, for which the merge is NONE. + if (other.type == PredicateType.IS_NOT_NULL) { + return type == PredicateType.IS_NULL ? none(column) : this; + } + + // NULL merged with any predicate type besides itself is NONE. + if (other.type == PredicateType.IS_NULL) { + return type == PredicateType.IS_NULL ? this : none(column); + } + + // Now other.type == EQUALITY, RANGE, or IN_LIST. + switch (type) { + case NONE: return this; + case IS_NOT_NULL: return other; + case IS_NULL: return none(column); + case EQUALITY: { + if (other.type == PredicateType.EQUALITY) { + if (compare(column, lower, other.lower) != 0) { + return none(this.column); + } else { + return this; + } + } else if (other.type == PredicateType.RANGE) { + if (other.rangeContains(lower)) { + return this; + } else { + return none(this.column); + } + } else { + Preconditions.checkState(other.type == PredicateType.IN_LIST); + return other.merge(this); + } + } + case RANGE: { + if (other.type == PredicateType.EQUALITY || other.type == PredicateType.IN_LIST) { + return other.merge(this); + } else { + Preconditions.checkState(other.type == PredicateType.RANGE); + byte[] newLower = other.lower == null || + (lower != null && compare(column, lower, other.lower) >= 0) ? lower : other.lower; + byte[] newUpper = other.upper == null || + (upper != null && compare(column, upper, other.upper) <= 0) ? upper : other.upper; + if (newLower != null && newUpper != null && compare(column, newLower, newUpper) >= 0) { + return none(column); + } else { + if (newLower != null && newUpper != null && areConsecutive(newLower, newUpper)) { + return new KuduPredicate(PredicateType.EQUALITY, column, newLower, null); + } else { + return new KuduPredicate(PredicateType.RANGE, column, newLower, newUpper); + } + } + } + } + case IN_LIST: { + if (other.type == PredicateType.EQUALITY) { + if (this.inListContains(other.lower)) { + return other; + } else { + return none(column); + } + } else if (other.type == PredicateType.RANGE) { + List values = new ArrayList<>(); + for (byte[] value : inListValues) { + if (other.rangeContains(value)) { + values.add(value); + } + } + return buildInList(column, values); + } else { + Preconditions.checkState(other.type == PredicateType.IN_LIST); + List values = new ArrayList<>(); + for (byte[] value : inListValues) { + if (other.inListContains(value)) { + values.add(value); + } + } + return buildInList(column, values); + } + } + default: throw new IllegalStateException(String.format("unknown predicate type %s", this)); + } + } + + /** + * Builds an IN list predicate from a collection of raw values. The collection + * must be sorted and deduplicated. + * + * @param column the column + * @param values the IN list values + * @return an IN list predicate + */ + private static KuduPredicate buildInList(ColumnSchema column, Collection values) { + // IN (true, false) predicates can be simplified to IS NOT NULL. + if (column.getType().getDataType(column.getTypeAttributes()) == + Common.DataType.BOOL && values.size() > 1) { + return newIsNotNullPredicate(column); + } + + switch (values.size()) { + case 0: return KuduPredicate.none(column); + case 1: return new KuduPredicate(PredicateType.EQUALITY, column, + values.iterator().next(), null); + default: return new KuduPredicate(column, values.toArray(new byte[values.size()][])); + } + } + + /** + * @param value the value to check for + * @return {@code true} if this IN list predicate contains the value + */ + boolean inListContains(byte[] value) { + final Comparator comparator = new Comparator() { + @Override + public int compare(byte[] a, byte[] b) { + return KuduPredicate.compare(column, a, b); + } + }; + return Arrays.binarySearch(inListValues, value, comparator) >= 0; + } + + /** + * @param value the value to check + * @return {@code true} if this RANGE predicate contains the value + */ + boolean rangeContains(byte[] value) { + return (lower == null || compare(column, value, lower) >= 0) && + (upper == null || compare(column, value, upper) < 0); + } + + /** + * @return the schema of the predicate column + */ + ColumnSchema getColumn() { + return column; + } + + /** + * Serializes a list of {@code KuduPredicate} into a byte array. + * @return the serialized kudu predicates + * @throws IOException + */ + @InterfaceAudience.LimitedPrivate("kudu-mapreduce") + public static byte[] serialize(List predicates) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + for (KuduPredicate predicate : predicates) { + Common.ColumnPredicatePB message = predicate.toPB(); + message.writeDelimitedTo(baos); + } + return baos.toByteArray(); + } + + /** + * Serializes a list of {@code KuduPredicate} into a byte array. + * @return the serialized kudu predicates + * @throws IOException + */ + @InterfaceAudience.LimitedPrivate("kudu-mapreduce") + public static List deserialize(Schema schema, byte[] bytes) throws IOException { + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + List predicates = new ArrayList<>(); + while (bais.available() > 0) { + Common.ColumnPredicatePB message = Common.ColumnPredicatePB.parseDelimitedFrom(bais); + predicates.add(KuduPredicate.fromPB(schema, message)); + } + return predicates; + } + + /** + * Convert the predicate to the protobuf representation. + * @return the protobuf message for this predicate + */ + @InterfaceAudience.Private + public Common.ColumnPredicatePB toPB() { + Common.ColumnPredicatePB.Builder builder = Common.ColumnPredicatePB.newBuilder(); + builder.setColumn(column.getName()); + + switch (type) { + case EQUALITY: { + builder.getEqualityBuilder().setValue(ByteString.copyFrom(lower)); + break; + } + case RANGE: { + Common.ColumnPredicatePB.Range.Builder b = builder.getRangeBuilder(); + if (lower != null) { + b.setLower(ByteString.copyFrom(lower)); + } + if (upper != null) { + b.setUpper(ByteString.copyFrom(upper)); + } + break; + } + case IS_NOT_NULL: { + builder.setIsNotNull(builder.getIsNotNullBuilder()); + break; + } + case IS_NULL: { + builder.setIsNull(builder.getIsNullBuilder()); + break; + } + case IN_LIST: { + Common.ColumnPredicatePB.InList.Builder inListBuilder = builder.getInListBuilder(); + for (byte[] value : inListValues) { + inListBuilder.addValues(ByteString.copyFrom(value)); + } + break; + } + case NONE: throw new IllegalStateException( + "can not convert None predicate to protobuf message"); + default: throw new IllegalArgumentException( + String.format("unknown predicate type: %s", type)); + } + return builder.build(); + } + + /** + * Convert a column predicate protobuf message into a predicate. + * @return a predicate + */ + @InterfaceAudience.Private + public static KuduPredicate fromPB(Schema schema, Common.ColumnPredicatePB pb) { + final ColumnSchema column = schema.getColumn(pb.getColumn()); + switch (pb.getPredicateCase()) { + case EQUALITY: + return new KuduPredicate(PredicateType.EQUALITY, column, + pb.getEquality().getValue().toByteArray(), null); + case RANGE: { + Common.ColumnPredicatePB.Range range = pb.getRange(); + return new KuduPredicate(PredicateType.RANGE, column, + range.hasLower() ? range.getLower().toByteArray() : null, + range.hasUpper() ? range.getUpper().toByteArray() : null); + } + case IS_NOT_NULL: + return newIsNotNullPredicate(column); + case IS_NULL: + return newIsNullPredicate(column); + case IN_LIST: { + Common.ColumnPredicatePB.InList inList = pb.getInList(); + + SortedSet values = new TreeSet<>(new Comparator() { + @Override + public int compare(byte[] a, byte[] b) { + return KuduPredicate.compare(column, a, b); + } + }); + + for (ByteString value : inList.getValuesList()) { + values.add(value.toByteArray()); + } + return buildInList(column, values); + } + default: + throw new IllegalArgumentException("unknown predicate type"); + } + } + + /** + * Compares two bounds based on the type of the column. + * @param column the column which the values belong to + * @param a the first serialized value + * @param b the second serialized value + * @return the comparison of the serialized values based on the column type + */ + private static int compare(ColumnSchema column, byte[] a, byte[] b) { + switch (column.getType().getDataType(column.getTypeAttributes())) { + case BOOL: + return Boolean.compare(Bytes.getBoolean(a), Bytes.getBoolean(b)); + case INT8: + return Byte.compare(Bytes.getByte(a), Bytes.getByte(b)); + case INT16: + return Short.compare(Bytes.getShort(a), Bytes.getShort(b)); + case INT32: + case DATE: + case DECIMAL32: + return Integer.compare(Bytes.getInt(a), Bytes.getInt(b)); + case INT64: + case UNIXTIME_MICROS: + case DECIMAL64: + return Long.compare(Bytes.getLong(a), Bytes.getLong(b)); + case FLOAT: + return Float.compare(Bytes.getFloat(a), Bytes.getFloat(b)); + case DOUBLE: + return Double.compare(Bytes.getDouble(a), Bytes.getDouble(b)); + case STRING: + case VARCHAR: + case BINARY: + return UnsignedBytes.lexicographicalComparator().compare(a, b); + case DECIMAL128: + return Bytes.getBigInteger(a).compareTo(Bytes.getBigInteger(b)); + default: + throw new IllegalStateException(String.format("unknown column type %s", column.getType())); + } + } + + /** + * Returns true if increment(a) == b. + * @param a the value which would be incremented + * @param b the target value + * @return true if increment(a) == b + */ + private boolean areConsecutive(byte[] a, byte[] b) { + switch (column.getType().getDataType(column.getTypeAttributes())) { + case BOOL: return false; + case INT8: { + byte m = Bytes.getByte(a); + byte n = Bytes.getByte(b); + return m < n && m + 1 == n; + } + case INT16: { + short m = Bytes.getShort(a); + short n = Bytes.getShort(b); + return m < n && m + 1 == n; + } + case INT32: + case DATE: + case DECIMAL32: { + int m = Bytes.getInt(a); + int n = Bytes.getInt(b); + return m < n && m + 1 == n; + } + case INT64: + case UNIXTIME_MICROS: + case DECIMAL64: { + long m = Bytes.getLong(a); + long n = Bytes.getLong(b); + return m < n && m + 1 == n; + } + case FLOAT: { + float m = Bytes.getFloat(a); + float n = Bytes.getFloat(b); + return m < n && Math.nextAfter(m, Float.POSITIVE_INFINITY) == n; + } + case DOUBLE: { + double m = Bytes.getDouble(a); + double n = Bytes.getDouble(b); + return m < n && Math.nextAfter(m, Double.POSITIVE_INFINITY) == n; + } + case STRING: + case VARCHAR: + case BINARY: { + if (a.length + 1 != b.length || b[a.length] != 0) { + return false; + } + for (int i = 0; i < a.length; i++) { + if (a[i] != b[i]) { + return false; + } + } + return true; + } + case DECIMAL128: { + BigInteger m = Bytes.getBigInteger(a); + BigInteger n = Bytes.getBigInteger(b); + return m.compareTo(n) < 0 && m.add(BigInteger.ONE).equals(n); + } + default: + throw new IllegalStateException(String.format("unknown column type %s", column.getType())); + } + } + + /** + * @return the encoded lower bound. + */ + byte[] getLower() { + return lower; + } + + /** + * @return the encoded upper bound. + */ + byte[] getUpper() { + return upper; + } + + /** + * @return the IN list values. Always kept sorted and de-duplicated. + */ + byte[][] getInListValues() { + return inListValues; + } + + /** + * Returns the maximum value for the integer type. + * @param type an integer type + * @return the maximum value + */ + @InterfaceAudience.LimitedPrivate("Test") + static long maxIntValue(Type type) { + switch (type) { + case INT8: + return Byte.MAX_VALUE; + case INT16: + return Short.MAX_VALUE; + case INT32: + return Integer.MAX_VALUE; + case UNIXTIME_MICROS: + case INT64: + return Long.MAX_VALUE; + case DATE: + return DateUtil.MAX_DATE_VALUE; + default: + throw new IllegalArgumentException("type must be an integer type"); + } + } + + /** + * Returns the minimum value for the integer type. + * @param type an integer type + * @return the minimum value + */ + @InterfaceAudience.LimitedPrivate("Test") + static long minIntValue(Type type) { + switch (type) { + case INT8: + return Byte.MIN_VALUE; + case INT16: + return Short.MIN_VALUE; + case INT32: + return Integer.MIN_VALUE; + case UNIXTIME_MICROS: + case INT64: + return Long.MIN_VALUE; + case DATE: + return DateUtil.MIN_DATE_VALUE; + default: + throw new IllegalArgumentException("type must be an integer type"); + } + } + + /** + * Checks that the column is one of the expected types. + * @param column the column being checked + * @param passedTypes the expected types (logical OR) + */ + private static void checkColumn(ColumnSchema column, Type... passedTypes) { + for (Type type : passedTypes) { + if (column.getType().equals(type)) { + return; + } + } + throw new IllegalArgumentException(String.format("%s's type isn't %s, it's %s", + column.getName(), Arrays.toString(passedTypes), + column.getType().getName())); + } + + /** + * Returns the string value of serialized value according to the type of column. + * @param value the value + * @return the text representation of the value + */ + private String valueToString(byte[] value) { + switch (column.getType().getDataType(column.getTypeAttributes())) { + case BOOL: return Boolean.toString(Bytes.getBoolean(value)); + case INT8: return Byte.toString(Bytes.getByte(value)); + case INT16: return Short.toString(Bytes.getShort(value)); + case INT32: return Integer.toString(Bytes.getInt(value)); + case INT64: return Long.toString(Bytes.getLong(value)); + case DATE: return DateUtil.epochDaysToDateString(Bytes.getInt(value)); + case UNIXTIME_MICROS: return TimestampUtil.timestampToString(Bytes.getLong(value)); + case FLOAT: return Float.toString(Bytes.getFloat(value)); + case DOUBLE: return Double.toString(Bytes.getDouble(value)); + case VARCHAR: + case STRING: { + String v = Bytes.getString(value); + StringBuilder sb = new StringBuilder(2 + v.length()); + sb.append('"'); + sb.append(v); + sb.append('"'); + return sb.toString(); + } + case BINARY: return Bytes.hex(value); + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + ColumnTypeAttributes typeAttributes = column.getTypeAttributes(); + return Bytes.getDecimal(value, typeAttributes.getPrecision(), + typeAttributes.getScale()).toString(); + default: + throw new IllegalStateException(String.format("unknown column type %s", column.getType())); + } + } + + @Override + public String toString() { + switch (type) { + case EQUALITY: return String.format("`%s` = %s", column.getName(), valueToString(lower)); + case RANGE: { + if (lower == null) { + return String.format("`%s` < %s", column.getName(), valueToString(upper)); + } else if (upper == null) { + return String.format("`%s` >= %s", column.getName(), valueToString(lower)); + } else { + return String.format("`%s` >= %s AND `%s` < %s", + column.getName(), valueToString(lower), + column.getName(), valueToString(upper)); + } + } + case IN_LIST: { + List strings = new ArrayList<>(inListValues.length); + for (byte[] value : inListValues) { + strings.add(valueToString(value)); + } + return String.format("`%s` IN (%s)", column.getName(), Joiner.on(", ").join(strings)); + } + case IS_NOT_NULL: return String.format("`%s` IS NOT NULL", column.getName()); + case IS_NULL: return String.format("`%s` IS NULL", column.getName()); + case NONE: return String.format("`%s` NONE", column.getName()); + default: throw new IllegalArgumentException(String.format("unknown predicate type %s", type)); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof KuduPredicate)) { + return false; + } + KuduPredicate that = (KuduPredicate) o; + return type == that.type && + column.equals(that.column) && + Arrays.equals(lower, that.lower) && + Arrays.equals(upper, that.upper) && + Arrays.deepEquals(inListValues, that.inListValues); + } + + @Override + public int hashCode() { + return Objects.hashCode(type, column, Arrays.hashCode(lower), + Arrays.hashCode(upper), Arrays.deepHashCode(inListValues)); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduRpc.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduRpc.java new file mode 100644 index 0000000000..a80a4a05ce --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduRpc.java @@ -0,0 +1,469 @@ +/* + * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.kudu.client; + +import static org.apache.kudu.client.ExternalConsistencyMode.CLIENT_PROPAGATED; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import com.google.common.collect.ImmutableList; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Message; +import com.stumbleupon.async.Deferred; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufOutputStream; +import io.netty.util.Timeout; +import io.netty.util.Timer; +import io.netty.util.TimerTask; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.security.Token; +import org.apache.kudu.util.Pair; +import org.apache.kudu.util.Slice; + +/** + * Abstract base class for all RPC requests going out to Kudu. + *

+ * Implementations of this class are not expected to be synchronized. + * + *

A note on passing {@code byte} arrays in argument

+ * None of the method that receive a {@code byte[]} in argument will copy it. + * If you change the contents of any byte array you give to an instance of + * this class, you may affect the behavior of the request in an + * unpredictable way. If you need to change the byte array, + * {@link Object#clone() clone} it before giving it to this class. For those + * familiar with the term "defensive copy", we don't do it in order to avoid + * unnecessary memory copies when you know you won't be changing (or event + * holding a reference to) the byte array, which is frequently the case. + * + * + *

Note regarding {@code KuduRpc} instances passed into {@link AsyncKuduSession}

+ * Every {@link KuduRpc} passed to a method of AsyncKuduSession should not be + * changed or re-used until the {@code Deferred} returned by that method + * calls you back. Changing or re-using any {@link KuduRpc} for + * an RPC in flight will lead to unpredictable results and voids + * your warranty. + */ +@InterfaceAudience.Private +public abstract class KuduRpc { + + /** + * This along with {@link Status#MAX_MESSAGE_LENGTH} dictates how big all the messages + * in a trace can be. + */ + @InterfaceAudience.LimitedPrivate("Test") + public static final int MAX_TRACES_SIZE = 100; + + /** + * Upper bound on the size of a byte array we de-serialize. + * This is to prevent Kudu from OOM'ing us, should there be a bug or + * undetected corruption of an RPC on the network, which would turn a + * an innocuous RPC into something allocating a ton of memory. + * The Hadoop RPC protocol doesn't do any checksumming as they probably + * assumed that TCP checksums would be sufficient (they're not). + */ + static final int MAX_RPC_SIZE = 256 * 1024 * 1024; // 256MB + + // Service names used by the client. + protected static final String MASTER_SERVICE_NAME = "kudu.master.MasterService"; + protected static final String TABLET_SERVER_SERVICE_NAME = "kudu.tserver.TabletServerService"; + protected static final String TXN_MANAGER_SERVICE_NAME = "kudu.transactions.TxnManagerService"; + + private static final Logger LOG = LoggerFactory.getLogger(KuduRpc.class); + + private final List traces = + Collections.synchronizedList(new ArrayList<>()); + + private KuduRpc parentRpc; + + /** + * Returns the partition key this RPC is for, or {@code null} if the RPC is + * not tablet specific. + *

+ * DO NOT MODIFY THE CONTENTS OF THE RETURNED ARRAY. + */ + byte[] partitionKey() { + return null; + } + + /** + * Binds the given authorization token to the request. + */ + void bindAuthzToken(Token.SignedTokenPB token) { + } + + /** + * Whether the request needs to be authorized via authz token. + */ + boolean needsAuthzToken() { + return false; + } + + /** + * The Deferred that will be invoked when this RPC completes or fails. + * In case of a successful completion, this Deferred's first callback + * will be invoked with an {@link Object} containing the de-serialized + * RPC response in argument. + * Once an RPC has been used, we create a new Deferred for it, in case + * the user wants to re-use it. + */ + private Deferred deferred; + + private RemoteTablet tablet; + + final KuduTable table; + + final TimeoutTracker timeoutTracker; + + // 'timeoutTask' is a handle to the timer task that will time out the RPC. It is + // null if and only if the task has no timeout. + Timeout timeoutTask; + + long propagatedTimestamp = -1; + ExternalConsistencyMode externalConsistencyMode = CLIENT_PROPAGATED; + + /** + * How many times have we retried this RPC?. + * Proper synchronization is required, although in practice most of the code + * that access this attribute will have a happens-before relationship with + * the rest of the code, due to other existing synchronization. + */ + int attempt; // package-private for RpcProxy and AsyncKuduClient only. + + /** + * Set by RpcProxy when isRequestTracked returns true to identify this RPC in the sequence of + * RPCs sent by this client. Once it is set it should never change unless the RPC is reused. + */ + private long sequenceId = RequestTracker.NO_SEQ_NO; + + KuduRpc(KuduTable table, Timer timer, long timeoutMillis) { + this.table = table; + this.timeoutTracker = new TimeoutTracker(); + timeoutTracker.setTimeout(timeoutMillis); + if (timer != null) { + this.timeoutTask = AsyncKuduClient.newTimeout(timer, + new RpcTimeoutTask(), + timeoutMillis); + } + } + + /** + * To be implemented by the concrete sub-type. + * + * Notice that this method is package-private, so only classes within this + * package can use this as a base class. + */ + abstract Message createRequestPB(); + + /** + * Package private way of getting the name of the RPC service. + */ + abstract String serviceName(); + + /** + * Package private way of getting the name of the RPC method. + */ + abstract String method(); + + /** + * Returns the set of application-specific feature flags required to service the RPC. + * @return the feature flags required to complete the RPC + */ + Collection getRequiredFeatures() { + return ImmutableList.of(); + } + + /** + * To be implemented by the concrete sub-type. + * This method is expected to de-serialize a response received for the + * current RPC. + * + * Notice that this method is package-private, so only classes within this + * package can use this as a base class. + * + * @param callResponse the call response from which to deserialize + * @param tsUUID a string that contains the UUID of the server that answered the RPC + * @return an Object of type R that will be sent to callback and an Object that will be an Error + * of type TabletServerErrorPB or MasterErrorPB that will be converted into an exception and + * sent to errback + * @throws KuduException an exception that will be sent to errback + */ + abstract Pair deserialize(CallResponse callResponse, String tsUUID) + throws KuduException; + + /** + * Update the statistics information before this rpc is called back. This method should not throw + * any exception, including RuntimeException. This method does nothing by default. + * + * @param statistics object to update + * @param response of this rpc + */ + void updateStatistics(Statistics statistics, R response){ + // default do nothing + } + + /** + * Sets the external consistency mode for this RPC. + * TODO make this cover most if not all RPCs (right now only scans and writes use this). + * @param externalConsistencyMode the mode to set + */ + public void setExternalConsistencyMode(ExternalConsistencyMode externalConsistencyMode) { + this.externalConsistencyMode = externalConsistencyMode; + } + + public ExternalConsistencyMode getExternalConsistencyMode() { + return this.externalConsistencyMode; + } + + /** + * Sets the propagated timestamp for this RPC. + * @param propagatedTimestamp the timestamp to propagate + */ + public void setPropagatedTimestamp(long propagatedTimestamp) { + this.propagatedTimestamp = propagatedTimestamp; + } + + private void handleCallback(final Object result) { + final Deferred d = deferred; + if (d == null) { + LOG.debug("Handling a callback on RPC {} with no deferred attached!", this); + return; + } + deferred = null; + attempt = 0; + // If the subclass is a "tracked RPC" unregister it, unless it never + // got to the point of being registered. + if (isRequestTracked() && sequenceId != RequestTracker.NO_SEQ_NO) { + table.getAsyncClient().getRequestTracker().rpcCompleted(sequenceId); + sequenceId = RequestTracker.NO_SEQ_NO; + } + if (timeoutTask != null) { + timeoutTask.cancel(); + } + timeoutTracker.reset(); + traces.clear(); + parentRpc = null; + d.callback(result); + } + + /** + * Add the provided trace to this RPC's collection of traces. If this RPC has a parent RPC, it + * will also receive that trace. If this RPC has reached the limit of traces it can track then + * the trace will just be discarded. + * @param rpcTraceFrame trace to add + */ + void addTrace(RpcTraceFrame rpcTraceFrame) { + if (parentRpc != null) { + parentRpc.addTrace(rpcTraceFrame); + } + + if (traces.size() == MAX_TRACES_SIZE) { + // Add a last trace that indicates that we've reached the max size. + traces.add( + new RpcTraceFrame.RpcTraceFrameBuilder( + this.method(), + RpcTraceFrame.Action.TRACE_TRUNCATED) + .build()); + } else if (traces.size() < MAX_TRACES_SIZE) { + traces.add(rpcTraceFrame); + } + } + + /** + * Sets this RPC to receive traces from the provided parent RPC. An RPC can only have one and + * only one parent RPC. + * @param parentRpc RPC that will also receive traces from this RPC + */ + void setParentRpc(KuduRpc parentRpc) { + assert (this.parentRpc == null); + assert (this != parentRpc); + this.parentRpc = parentRpc; + } + + /** + * Package private way of making an RPC complete by giving it its result. + * If this RPC has no {@link Deferred} associated to it, nothing will + * happen. This may happen if the RPC was already called back. + *

+ * Once this call to this method completes, this object can be re-used to + * re-send the same RPC, provided that no other thread still believes this + * RPC to be in-flight (guaranteeing this may be hard in error cases). + */ + final void callback(final R result) { + handleCallback(result); + } + + /** + * Same as callback, except that it accepts an Exception. + */ + final void errback(final Exception e) { + handleCallback(e); + } + + /** Package private way of accessing / creating the Deferred of this RPC. */ + final Deferred getDeferred() { + if (deferred == null) { + deferred = new Deferred<>(); + } + return deferred; + } + + boolean hasDeferred() { + return deferred != null; + } + + RemoteTablet getTablet() { + return this.tablet; + } + + void setTablet(RemoteTablet tablet) { + this.tablet = tablet; + } + + public KuduTable getTable() { + return table; + } + + /** + * If this RPC needs to be tracked on the client and server-side. Some RPCs require exactly-once + * semantics which is enabled by tracking them. + * @return true if the request has to be tracked, else false + */ + boolean isRequestTracked() { + return false; + } + + long getSequenceId() { + return sequenceId; + } + + ReplicaSelection getReplicaSelection() { + return ReplicaSelection.LEADER_ONLY; + } + + /** + * Get an immutable copy of the traces. + * @return list of traces + */ + List getImmutableTraces() { + return ImmutableList.copyOf(traces); + } + + void setSequenceId(long sequenceId) { + assert (this.sequenceId == RequestTracker.NO_SEQ_NO); + this.sequenceId = sequenceId; + } + + @Override + public String toString() { + final StringBuilder buf = new StringBuilder(); + buf.append("KuduRpc(method="); + buf.append(method()); + buf.append(", tablet="); + if (tablet == null) { + buf.append("null"); + } else { + buf.append(tablet.getTabletId()); + } + buf.append(", attempt=").append(attempt); + if (isRequestTracked()) { + buf.append(", sequence_id=").append(sequenceId); + } + buf.append(", ").append(timeoutTracker); + // Cheating a bit, we're not actually logging but we'll augment the information provided by + // this method if DEBUG is enabled. + // + // Lock the traces array and get a copy of it before traverse the traces. This method is + // mostly called when some exceptions are raised, so the influence of performance should be + // acceptable. + List tracesCopy; + synchronized (traces) { + tracesCopy = new ArrayList<>(traces); + } + if (LOG.isDebugEnabled()) { + buf.append(", ").append(RpcTraceFrame.getHumanReadableStringForTraces(tracesCopy)); + buf.append(", deferred=").append(deferred); + } else { + buf.append(", ").append(RpcTraceFrame.getHumanReadableSummaryStringForTraces(tracesCopy)); + } + buf.append(')'); + return buf.toString(); + } + + static void readProtobuf(final Slice slice, + final Message.Builder builder) { + final int length = slice.length(); + final byte[] payload = slice.getRawArray(); + final int offset = slice.getRawOffset(); + try { + builder.mergeFrom(payload, offset, length); + if (!builder.isInitialized()) { + throw new RuntimeException("Could not deserialize the response," + + " incompatible RPC? Error is: " + builder.getInitializationErrorString()); + } + } catch (InvalidProtocolBufferException e) { + throw new RuntimeException("Invalid RPC response: length=" + length, e); + } + } + + // TODO(todd): make this private and have all RPCs send RpcOutboundMessage + // instances instead of ByteBuf + static void toByteBuf(ByteBuf out, Message header, Message pb) { + int totalSize = IPCUtil.getTotalSizeWhenWrittenDelimited(header, pb); + out.capacity(totalSize + 4); + out.writeInt(totalSize); + try (ByteBufOutputStream bos = new ByteBufOutputStream(out)) { + CodedOutputStream cos = CodedOutputStream.newInstance(bos, totalSize); + cos.writeUInt32NoTag(header.getSerializedSize()); + header.writeTo(cos); + + cos.writeUInt32NoTag(pb.getSerializedSize()); + pb.writeTo(cos); + cos.flush(); + } catch (IOException e) { + throw new RuntimeException("Cannot serialize the following message " + pb); + } + } + + /** + * A netty TimerTask for timing out a KuduRpc. + */ + final class RpcTimeoutTask implements TimerTask { + @Override + public void run(final Timeout timeout) { + Status statusTimedOut = Status.TimedOut("cannot complete before timeout: " + KuduRpc.this); + KuduRpc.this.errback(new NonRecoverableException(statusTimedOut)); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduRpcResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduRpcResponse.java new file mode 100644 index 0000000000..f57548fa8c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduRpcResponse.java @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Base class for RPC responses. + */ +@InterfaceAudience.Private +abstract class KuduRpcResponse { + private final long elapsedMillis; + private final String tsUUID; + + /** + * Constructor with information common to all RPCs. + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param tsUUID a string that contains the UUID of the server that answered the RPC + */ + KuduRpcResponse(long elapsedMillis, String tsUUID) { + this.elapsedMillis = elapsedMillis; + this.tsUUID = tsUUID; + } + + /** + * Get the number of milliseconds elapsed since the RPC was created up to the moment when this + * response was created. + * @return elapsed time in milliseconds + */ + public long getElapsedMillis() { + return elapsedMillis; + } + + /** + * Get the identifier of the tablet server that sent the response. May be + * {@code null} if the RPC failed before tablet location lookup succeeded. + * @return a string containing a UUID + */ + public String getTsUUID() { + return tsUUID; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduScanToken.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduScanToken.java new file mode 100644 index 0000000000..3a5878a151 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduScanToken.java @@ -0,0 +1,740 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.io.IOException; +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; +import com.google.protobuf.ByteString; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.UnsafeByteOperations; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Common; +import org.apache.kudu.Schema; +import org.apache.kudu.client.Client.ScanTokenPB; +import org.apache.kudu.security.Token; +import org.apache.kudu.util.NetUtil; +import org.apache.kudu.util.Pair; + +/** + * A scan token describes a partial scan of a Kudu table limited to a single + * contiguous physical location. Using the {@link KuduScanTokenBuilder}, clients can + * describe the desired scan, including predicates, bounds, timestamps, and + * caching, and receive back a collection of scan tokens. + * + * Each scan token may be separately turned into a scanner using + * {@link #intoScanner}, with each scanner responsible for a disjoint section + * of the table. + * + * Scan tokens may be serialized using the {@link #serialize} method and + * deserialized back into a scanner using the {@link #deserializeIntoScanner} + * method. This allows use cases such as generating scan tokens in the planner + * component of a query engine, then sending the tokens to execution nodes based + * on locality, and then instantiating the scanners on those nodes. + * + * Scan token locality information can be inspected using the {@link #getTablet} + * method. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class KuduScanToken implements Comparable { + private final LocatedTablet tablet; + private final ScanTokenPB message; + + private KuduScanToken(LocatedTablet tablet, ScanTokenPB message) { + this.tablet = tablet; + this.message = message; + } + + /** + * Returns the tablet which the scanner created from this token will access. + * @return the located tablet + */ + public LocatedTablet getTablet() { + return tablet; + } + + /** + * Creates a {@link KuduScanner} from this scan token. + * @param client a Kudu client for the cluster + * @return a scanner for the scan token + */ + public KuduScanner intoScanner(KuduClient client) throws IOException { + return pbIntoScannerBuilder(message, client).build(); + } + + /** + * Serializes this {@code KuduScanToken} into a byte array. + * @return the serialized scan token + * @throws IOException + */ + public byte[] serialize() throws IOException { + return serialize(message); + } + + /** + * Serializes a {@code KuduScanToken} into a byte array. + * @return the serialized scan token + * @throws IOException + */ + @InterfaceAudience.LimitedPrivate("Test") + static byte[] serialize(ScanTokenPB message) throws IOException { + byte[] buf = new byte[message.getSerializedSize()]; + CodedOutputStream cos = CodedOutputStream.newInstance(buf); + message.writeTo(cos); + cos.flush(); + return buf; + } + + /** + * Deserializes a {@code KuduScanToken} into a {@link KuduScanner}. + * @param buf a byte array containing the serialized scan token. + * @param client a Kudu client for the cluster + * @return a scanner for the serialized scan token + */ + public static KuduScanner deserializeIntoScanner(byte[] buf, KuduClient client) + throws IOException { + return deserializeIntoScannerBuilder(buf, client).build(); + } + + /** + * Deserializes a {@code KuduScanToken} into a {@link KuduScanner.KuduScannerBuilder}. + * @param buf a byte array containing the serialized scan token. + * @param client a Kudu client for the cluster + * @return a scanner builder for the serialized scan token + */ + public static KuduScanner.KuduScannerBuilder deserializeIntoScannerBuilder( + byte[] buf, KuduClient client) throws IOException { + return pbIntoScannerBuilder(ScanTokenPB.parseFrom(CodedInputStream.newInstance(buf)), client); + } + + /** + * Formats the serialized token for debug printing. + * + * @param buf the serialized token + * @param client a Kudu client for the cluster to which the token belongs + * @return a debug string + */ + public static String stringifySerializedToken(byte[] buf, KuduClient client) throws IOException { + ScanTokenPB token = ScanTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + KuduTable table = getKuduTable(token, client); + + MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper("ScanToken") + .add("table-name", table.getName()); + helper.add("table-id", table.getTableId()); + + if (token.hasLowerBoundPrimaryKey() && !token.getLowerBoundPrimaryKey().isEmpty()) { + helper.add("lower-bound-primary-key", + KeyEncoder.decodePrimaryKey(table.getSchema(), + token.getLowerBoundPrimaryKey().toByteArray()) + .stringifyRowKey()); + } + + if (token.hasUpperBoundPrimaryKey() && !token.getUpperBoundPrimaryKey().isEmpty()) { + helper.add("upper-bound-primary-key", + KeyEncoder.decodePrimaryKey(table.getSchema(), + token.getUpperBoundPrimaryKey().toByteArray()) + .stringifyRowKey()); + } + + helper.addValue(KeyEncoder.formatPartitionKeyRange(table.getSchema(), + table.getPartitionSchema(), + token.getLowerBoundPartitionKey() + .toByteArray(), + token.getUpperBoundPartitionKey() + .toByteArray())); + + return helper.toString(); + } + + private static List computeProjectedColumnIndexesForScanner(ScanTokenPB message, + Schema schema) { + if (message.getProjectedColumnIdxCount() != 0) { + return message.getProjectedColumnIdxList(); + } + + List columns = new ArrayList<>(message.getProjectedColumnsCount()); + for (Common.ColumnSchemaPB colSchemaFromPb : message.getProjectedColumnsList()) { + int colIdx = colSchemaFromPb.hasId() && schema.hasColumnIds() ? + schema.getColumnIndex(colSchemaFromPb.getId()) : + schema.getColumnIndex(colSchemaFromPb.getName()); + ColumnSchema colSchema = schema.getColumnByIndex(colIdx); + if (colSchemaFromPb.getType() != + colSchema.getType().getDataType(colSchema.getTypeAttributes())) { + throw new IllegalStateException(String.format( + "invalid type %s for column '%s' in scan token, expected: %s", + colSchemaFromPb.getType().name(), colSchemaFromPb.getName(), + colSchema.getType().name())); + } + if (colSchemaFromPb.getIsNullable() != colSchema.isNullable()) { + throw new IllegalStateException(String.format( + "invalid nullability for column '%s' in scan token, expected: %s", + colSchemaFromPb.getName(), colSchema.isNullable() ? "NULLABLE" : "NOT NULL")); + } + columns.add(colIdx); + } + return columns; + } + + /** + * create a new RemoteTablet from TabletMetadata + * @param tabletMetadata the tablet metadata + * @param tableId the table Id + * @param partition the partition + * @return a RemoteTablet object + */ + public static RemoteTablet newRemoteTabletFromTabletMetadata( + Client.TabletMetadataPB tabletMetadata, + String tableId, + Partition partition) { + List replicas = new ArrayList<>(); + for (Client.TabletMetadataPB.ReplicaMetadataPB replicaMetadataPB : + tabletMetadata.getReplicasList()) { + Client.ServerMetadataPB server = + tabletMetadata.getTabletServers(replicaMetadataPB.getTsIdx()); + LocatedTablet.Replica replica = new LocatedTablet.Replica( + server.getRpcAddresses(0).getHost(), + server.getRpcAddresses(0).getPort(), + replicaMetadataPB.getRole(), replicaMetadataPB.getDimensionLabel()); + replicas.add(replica); + } + + List servers = new ArrayList<>(); + for (Client.ServerMetadataPB serverMetadataPB : tabletMetadata.getTabletServersList()) { + HostAndPort hostPort = + ProtobufHelper.hostAndPortFromPB(serverMetadataPB.getRpcAddresses(0)); + final InetAddress inetAddress = NetUtil.getInetAddress(hostPort.getHost()); + ServerInfo serverInfo = new ServerInfo(serverMetadataPB.getUuid().toStringUtf8(), + hostPort, inetAddress, serverMetadataPB.getLocation()); + servers.add(serverInfo); + } + + RemoteTablet remoteTablet = new RemoteTablet(tableId, + tabletMetadata.getTabletId(), partition, replicas, servers); + return remoteTablet; + } + + @SuppressWarnings("deprecation") + private static KuduScanner.KuduScannerBuilder pbIntoScannerBuilder( + ScanTokenPB message, KuduClient client) throws KuduException { + Preconditions.checkArgument( + !message.getFeatureFlagsList().contains(ScanTokenPB.Feature.Unknown), + "Scan token requires an unsupported feature. This Kudu client must be updated."); + + // Use the table metadata from the scan token if it exists, + // otherwise call OpenTable to get the metadata from the master. + KuduTable table = getKuduTable(message, client); + + // Prime the client tablet location cache if no entry is already present. + if (message.hasTabletMetadata()) { + Client.TabletMetadataPB tabletMetadata = message.getTabletMetadata(); + Partition partition = + ProtobufHelper.pbToPartition(tabletMetadata.getPartition()); + if (client.asyncClient.getTableLocationEntry(table.getTableId(), + partition.partitionKeyStart) == null) { + TableLocationsCache tableLocationsCache = + client.asyncClient.getOrCreateTableLocationsCache(table.getTableId()); + + RemoteTablet remoteTablet = + newRemoteTabletFromTabletMetadata(tabletMetadata, table.getTableId(), partition); + + tableLocationsCache.cacheTabletLocations(Collections.singletonList(remoteTablet), + partition.partitionKeyStart, 1, tabletMetadata.getTtlMillis()); + } + } + + if (message.hasAuthzToken()) { + client.asyncClient.getAuthzTokenCache().put(table.getTableId(), message.getAuthzToken()); + } + + KuduScanner.KuduScannerBuilder builder = client.newScannerBuilder(table); + + builder.setProjectedColumnIndexes( + computeProjectedColumnIndexesForScanner(message, table.getSchema())); + + for (Common.ColumnPredicatePB pred : message.getColumnPredicatesList()) { + builder.addPredicate(KuduPredicate.fromPB(table.getSchema(), pred)); + } + + if (message.hasLowerBoundPrimaryKey()) { + builder.lowerBoundRaw(message.getLowerBoundPrimaryKey().toByteArray()); + } + if (message.hasUpperBoundPrimaryKey()) { + builder.exclusiveUpperBoundRaw(message.getUpperBoundPrimaryKey().toByteArray()); + } + + if (message.hasLowerBoundPartitionKey()) { + builder.lowerBoundPartitionKeyRaw(message.getLowerBoundPartitionKey().toByteArray()); + } + if (message.hasUpperBoundPartitionKey()) { + builder.exclusiveUpperBoundPartitionKeyRaw(message.getUpperBoundPartitionKey().toByteArray()); + } + + if (message.hasLimit()) { + builder.limit(message.getLimit()); + } + + if (message.hasReadMode()) { + switch (message.getReadMode()) { + case READ_AT_SNAPSHOT: { + builder.readMode(AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT); + if (message.hasSnapTimestamp()) { + builder.snapshotTimestampRaw(message.getSnapTimestamp()); + } + // Set the diff scan timestamps if they are set. + if (message.hasSnapStartTimestamp()) { + builder.diffScan(message.getSnapStartTimestamp(), message.getSnapTimestamp()); + } + break; + } + case READ_LATEST: { + builder.readMode(AsyncKuduScanner.ReadMode.READ_LATEST); + break; + } + case READ_YOUR_WRITES: { + builder.readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES); + break; + } + default: throw new IllegalArgumentException("unknown read mode"); + } + } + + if (message.hasReplicaSelection()) { + switch (message.getReplicaSelection()) { + case LEADER_ONLY: { + builder.replicaSelection(ReplicaSelection.LEADER_ONLY); + break; + } + case CLOSEST_REPLICA: { + builder.replicaSelection(ReplicaSelection.CLOSEST_REPLICA); + break; + } + default: throw new IllegalArgumentException("unknown replica selection policy"); + } + } + + if (message.hasPropagatedTimestamp() && + message.getPropagatedTimestamp() != AsyncKuduClient.NO_TIMESTAMP) { + client.updateLastPropagatedTimestamp(message.getPropagatedTimestamp()); + } + + if (message.hasCacheBlocks()) { + builder.cacheBlocks(message.getCacheBlocks()); + } + + if (message.hasFaultTolerant()) { + builder.setFaultTolerant(message.getFaultTolerant()); + } + + if (message.hasBatchSizeBytes()) { + builder.batchSizeBytes(message.getBatchSizeBytes()); + } + + if (message.hasScanRequestTimeoutMs()) { + builder.scanRequestTimeout(message.getScanRequestTimeoutMs()); + } + + if (message.hasKeepAlivePeriodMs()) { + builder.keepAlivePeriodMs(message.getKeepAlivePeriodMs()); + } + if (message.hasQueryId()) { + builder.setQueryId(message.getQueryId()); + } + + return builder; + } + + private static KuduTable getKuduTable(ScanTokenPB message, + KuduClient client) throws KuduException { + // Use the table metadata from the scan token if it exists, + // otherwise call OpenTable to get the metadata from the master. + if (message.hasTableMetadata()) { + Client.TableMetadataPB tableMetadata = message.getTableMetadata(); + Schema schema = ProtobufHelper.pbToSchema(tableMetadata.getSchema()); + PartitionSchema partitionSchema = + ProtobufHelper.pbToPartitionSchema(tableMetadata.getPartitionSchema(), schema); + return new KuduTable(client.asyncClient, tableMetadata.getTableName(), + tableMetadata.getTableId(), schema, partitionSchema, + tableMetadata.getNumReplicas(), tableMetadata.getExtraConfigsMap(), + tableMetadata.getOwner(), tableMetadata.getComment()); + } else if (message.hasTableId()) { + return client.openTableById(message.getTableId()); + } else { + return client.openTable(message.getTableName()); + } + } + + @Override + public int compareTo(KuduScanToken other) { + if (message.hasTableId() && other.message.hasTableId()) { + if (!message.getTableId().equals(other.message.getTableId())) { + throw new IllegalArgumentException("Scan tokens from different tables may not be compared"); + } + } else if (!message.getTableName().equals(other.message.getTableName())) { + throw new IllegalArgumentException("Scan tokens from different tables may not be compared"); + } + + return tablet.getPartition().compareTo(other.getTablet().getPartition()); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof KuduScanToken)) { + return false; + } + KuduScanToken that = (KuduScanToken) o; + return compareTo(that) == 0; + } + + @Override + public int hashCode() { + return Objects.hashCode(tablet, message); + } + + /** + * Builds a sequence of scan tokens. + */ + @InterfaceAudience.Public + @InterfaceStability.Unstable + public static class KuduScanTokenBuilder + extends AbstractKuduScannerBuilder> { + + private static final int DEFAULT_SPLIT_SIZE_BYTES = -1; + + private long timeout; + + // By default, a scan token is created for each tablet to be scanned. + private long splitSizeBytes = DEFAULT_SPLIT_SIZE_BYTES; + + private boolean includeTableMetadata = true; + private boolean includeTabletMetadata = true; + + KuduScanTokenBuilder(AsyncKuduClient client, KuduTable table) { + super(client, table); + timeout = client.getDefaultOperationTimeoutMs(); + } + + /** + * Sets a timeout value to use when building the list of scan tokens. If + * unset, the client operation timeout will be used. + * @param timeoutMs the timeout in milliseconds. + */ + public KuduScanTokenBuilder setTimeout(long timeoutMs) { + timeout = timeoutMs; + return this; + } + + /** + * Sets the data size of key range. It is used to split tablet's primary key range + * into smaller ranges. The split doesn't change the layout of the tablet. This is a hint: + * The tablet server may return the size of key range larger or smaller than this value. + * If unset or <= 0, the key range includes all the data of the tablet. + * @param splitSizeBytes the data size of key range. + */ + public KuduScanTokenBuilder setSplitSizeBytes(long splitSizeBytes) { + this.splitSizeBytes = splitSizeBytes; + return this; + } + + /** + * If the table metadata is included on the scan token a GetTableSchema + * RPC call to the master can be avoided when deserializing each scan token + * into a scanner. + * @param includeMetadata true, if table metadata should be included. + */ + public KuduScanTokenBuilder includeTableMetadata(boolean includeMetadata) { + this.includeTableMetadata = includeMetadata; + return this; + } + + /** + * If the tablet metadata is included on the scan token a GetTableLocations + * RPC call to the master can be avoided when scanning with a scanner constructed + * from a scan token. + * @param includeMetadata true, if tablet metadata should be included. + */ + public KuduScanTokenBuilder includeTabletMetadata(boolean includeMetadata) { + this.includeTabletMetadata = includeMetadata; + return this; + } + + @Override + public List build() { + if (lowerBoundPartitionKey.length != 0 || + upperBoundPartitionKey.length != 0) { + throw new IllegalArgumentException( + "Partition key bounds may not be set on KuduScanTokenBuilder"); + } + + // If the scan is short-circuitable, then return no tokens. + for (KuduPredicate predicate : predicates.values()) { + if (predicate.getType() == KuduPredicate.PredicateType.NONE) { + return new ArrayList<>(); + } + } + + Client.ScanTokenPB.Builder proto = Client.ScanTokenPB.newBuilder(); + + if (includeTableMetadata) { + // Set the table metadata so that a call to the master is not needed when + // deserializing the token into a scanner. + Client.TableMetadataPB tableMetadataPB = Client.TableMetadataPB.newBuilder() + .setTableId(table.getTableId()) + .setTableName(table.getName()) + .setOwner(table.getOwner()) + .setComment(table.getComment()) + .setNumReplicas(table.getNumReplicas()) + .setSchema(ProtobufHelper.schemaToPb(table.getSchema())) + .setPartitionSchema(ProtobufHelper.partitionSchemaToPb(table.getPartitionSchema())) + .putAllExtraConfigs(table.getExtraConfig()) + .build(); + proto.setTableMetadata(tableMetadataPB); + + // Only include the authz token if the table metadata is included. + // It is returned in the required GetTableSchema request otherwise. + Token.SignedTokenPB authzToken = client.getAuthzToken(table.getTableId()); + if (authzToken != null) { + proto.setAuthzToken(authzToken); + } + } else { + // If we add the table metadata, we don't need to set the old table id + // and table name. It is expected that the creation and use of a scan token + // will be on the same or compatible versions. + proto.setTableId(table.getTableId()); + proto.setTableName(table.getName()); + } + + // Map the column names or indices to actual columns in the table schema. + // If the user did not set either projection, then scan all columns. + Schema schema = table.getSchema(); + if (includeTableMetadata) { + // If the table metadata is included, then the column indexes can be + // used instead of duplicating the ColumnSchemaPBs in the serialized + // scan token. + if (projectedColumnNames != null) { + for (String columnName : projectedColumnNames) { + proto.addProjectedColumnIdx(schema.getColumnIndex(columnName)); + } + } else if (projectedColumnIndexes != null) { + proto.addAllProjectedColumnIdx(projectedColumnIndexes); + } else { + List indexes = IntStream.range(0, schema.getColumnCount()) + .boxed().collect(Collectors.toList()); + proto.addAllProjectedColumnIdx(indexes); + } + } else { + if (projectedColumnNames != null) { + for (String columnName : projectedColumnNames) { + ColumnSchema columnSchema = schema.getColumn(columnName); + Preconditions.checkArgument(columnSchema != null, + "unknown column i%s", columnName); + ProtobufHelper.columnToPb(proto.addProjectedColumnsBuilder(), + schema.hasColumnIds() ? schema.getColumnId(columnName) : -1, + columnSchema); + } + } else if (projectedColumnIndexes != null) { + for (int columnIdx : projectedColumnIndexes) { + ColumnSchema columnSchema = schema.getColumnByIndex(columnIdx); + Preconditions.checkArgument(columnSchema != null, + "unknown column index %s", columnIdx); + ProtobufHelper.columnToPb(proto.addProjectedColumnsBuilder(), + schema.hasColumnIds() ? + schema.getColumnId(columnSchema.getName()) : + -1, + columnSchema); + } + } else { + for (ColumnSchema column : schema.getColumns()) { + ProtobufHelper.columnToPb(proto.addProjectedColumnsBuilder(), + schema.hasColumnIds() ? + schema.getColumnId(column.getName()) : + -1, + column); + } + } + } + + for (KuduPredicate predicate : predicates.values()) { + proto.addColumnPredicates(predicate.toPB()); + } + + if (lowerBoundPrimaryKey.length > 0) { + proto.setLowerBoundPrimaryKey(UnsafeByteOperations.unsafeWrap(lowerBoundPrimaryKey)); + } + if (upperBoundPrimaryKey.length > 0) { + proto.setUpperBoundPrimaryKey(UnsafeByteOperations.unsafeWrap(upperBoundPrimaryKey)); + } + + proto.setLimit(limit); + proto.setReadMode(readMode.pbVersion()); + + if (replicaSelection == ReplicaSelection.LEADER_ONLY) { + proto.setReplicaSelection(Common.ReplicaSelection.LEADER_ONLY); + } else if (replicaSelection == ReplicaSelection.CLOSEST_REPLICA) { + proto.setReplicaSelection(Common.ReplicaSelection.CLOSEST_REPLICA); + } + + // If the last propagated timestamp is set send it with the scan. + if (table.getAsyncClient().getLastPropagatedTimestamp() != AsyncKuduClient.NO_TIMESTAMP) { + proto.setPropagatedTimestamp(client.getLastPropagatedTimestamp()); + } + + // If the mode is set to read on snapshot set the snapshot timestamps. + if (readMode == AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT) { + if (htTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + proto.setSnapTimestamp(htTimestamp); + } + if (startTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + proto.setSnapStartTimestamp(startTimestamp); + } + } + + proto.setCacheBlocks(cacheBlocks); + proto.setFaultTolerant(isFaultTolerant); + proto.setBatchSizeBytes(batchSizeBytes); + proto.setScanRequestTimeoutMs(scanRequestTimeout); + proto.setKeepAlivePeriodMs(keepAlivePeriodMs); + + try { + PartitionPruner pruner = PartitionPruner.create(this); + List keyRanges = new ArrayList<>(); + while (pruner.hasMorePartitionKeyRanges()) { + Pair partitionRange = pruner.nextPartitionKeyRange(); + List newKeyRanges = client.getTableKeyRanges( + table, + proto.getLowerBoundPrimaryKey().toByteArray(), + proto.getUpperBoundPrimaryKey().toByteArray(), + partitionRange.getFirst().length == 0 ? null : partitionRange.getFirst(), + partitionRange.getSecond().length == 0 ? null : partitionRange.getSecond(), + AsyncKuduClient.FETCH_TABLETS_PER_RANGE_LOOKUP, + splitSizeBytes, + timeout).join(); + + if (newKeyRanges.isEmpty()) { + pruner.removePartitionKeyRange(partitionRange.getSecond()); + } else { + pruner.removePartitionKeyRange(newKeyRanges.get(newKeyRanges.size() - 1) + .getPartitionKeyEnd()); + } + keyRanges.addAll(newKeyRanges); + } + + List tokens = new ArrayList<>(keyRanges.size()); + for (KeyRange keyRange : keyRanges) { + Client.ScanTokenPB.Builder builder = proto.clone(); + builder.setLowerBoundPartitionKey( + UnsafeByteOperations.unsafeWrap(keyRange.getPartitionKeyStart())); + builder.setUpperBoundPartitionKey( + UnsafeByteOperations.unsafeWrap(keyRange.getPartitionKeyEnd())); + byte[] primaryKeyStart = keyRange.getPrimaryKeyStart(); + if (primaryKeyStart != null && primaryKeyStart.length > 0) { + builder.setLowerBoundPrimaryKey(UnsafeByteOperations.unsafeWrap(primaryKeyStart)); + } + byte[] primaryKeyEnd = keyRange.getPrimaryKeyEnd(); + if (primaryKeyEnd != null && primaryKeyEnd.length > 0) { + builder.setUpperBoundPrimaryKey(UnsafeByteOperations.unsafeWrap(primaryKeyEnd)); + } + + LocatedTablet tablet = keyRange.getTablet(); + + // Set the tablet metadata so that a call to the master is not needed to + // locate the tablet to scan when opening the scanner. + if (includeTabletMetadata) { + TableLocationsCache.Entry entry = client.getTableLocationEntry(table.getTableId(), + tablet.getPartition().partitionKeyStart); + if (entry != null && !entry.isNonCoveredRange() && !entry.isStale()) { + RemoteTablet remoteTablet = entry.getTablet(); + + // Build the list of server metadata. + List servers = new ArrayList<>(); + Map serverIndexMap = new HashMap<>(); + List tabletServers = remoteTablet.getTabletServersCopy(); + for (int i = 0; i < tabletServers.size(); i++) { + ServerInfo serverInfo = tabletServers.get(i); + Client.ServerMetadataPB serverMetadataPB = + Client.ServerMetadataPB.newBuilder() + .setUuid(ByteString.copyFromUtf8(serverInfo.getUuid())) + .addRpcAddresses( + ProtobufHelper.hostAndPortToPB(serverInfo.getHostAndPort())) + .setLocation(serverInfo.getLocation()) + .build(); + servers.add(serverMetadataPB); + serverIndexMap.put(serverInfo.getHostAndPort(), i); + } + + // Build the list of replica metadata. + List replicas = new ArrayList<>(); + for (LocatedTablet.Replica replica : remoteTablet.getReplicas()) { + Integer serverIndex = serverIndexMap.get( + new HostAndPort(replica.getRpcHost(), replica.getRpcPort())); + // If the server index is not found it means that RemoteTablet.removeTabletClient + // was called and removed the server likely as a result of a tablet not found error. + // In that case we should remove the replica as it can't be contacted. + if (serverIndex == null) { + continue; + } + + Client.TabletMetadataPB.ReplicaMetadataPB.Builder tabletMetadataBuilder = + Client.TabletMetadataPB.ReplicaMetadataPB.newBuilder() + .setRole(replica.getRoleAsEnum()) + .setTsIdx(serverIndex); + if (replica.getDimensionLabel() != null) { + tabletMetadataBuilder.setDimensionLabel(replica.getDimensionLabel()); + } + replicas.add(tabletMetadataBuilder.build()); + } + + // Build the tablet metadata and add it to the token. + Client.TabletMetadataPB tabletMetadataPB = Client.TabletMetadataPB.newBuilder() + .setTabletId(remoteTablet.getTabletId()) + .setPartition(ProtobufHelper.partitionToPb(remoteTablet.getPartition())) + .addAllReplicas(replicas) + .addAllTabletServers(servers) + .setTtlMillis(entry.ttl()) + .build(); + builder.setTabletMetadata(tabletMetadataPB); + } + } + builder.setQueryId(queryId); + + tokens.add(new KuduScanToken(keyRange.getTablet(), builder.build())); + } + return tokens; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduScanner.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduScanner.java new file mode 100644 index 0000000000..79ba197432 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduScanner.java @@ -0,0 +1,263 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Schema; +import org.apache.kudu.client.AsyncKuduScanner.ReadMode; + +/** + * Synchronous version of {@link AsyncKuduScanner}. Offers the same API but with blocking methods. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class KuduScanner implements Iterable { + + private final AsyncKuduScanner asyncScanner; + + KuduScanner(AsyncKuduScanner asyncScanner) { + this.asyncScanner = asyncScanner; + } + + /** + * Tells if the last rpc returned that there might be more rows to scan. + * @return true if there might be more data to scan, else false + */ + public boolean hasMoreRows() { + return asyncScanner.hasMoreRows(); + } + + /** + * If set to true, the {@link RowResult} object returned by the {@link RowResultIterator} + * will be reused with each call to {@link RowResultIterator#next()}. + * This can be a useful optimization to reduce the number of objects created. + * + * Note: DO NOT use this if the RowResult is stored between calls to next(). + * Enabling this optimization means that a call to next() mutates the previously returned + * RowResult. Accessing the previously returned RowResult after a call to next(), by storing all + * RowResults in a collection and accessing them later for example, will lead to all of the + * stored RowResults being mutated as per the data in the last RowResult returned. + */ + public void setReuseRowResult(boolean reuseRowResult) { + asyncScanner.setReuseRowResult(reuseRowResult); + } + + /** + * Optionally set expected row data format. + * + * @param rowDataFormat Row data format to be expected. + */ + public void setRowDataFormat(AsyncKuduScanner.RowDataFormat rowDataFormat) { + asyncScanner.setRowDataFormat(rowDataFormat); + } + + /** + * Scans a number of rows. + *

+ * Once this method returns {@code null} once (which indicates that this + * {@code Scanner} is done scanning), calling it again leads to an undefined + * behavior. + * @return a list of rows. + * @throws KuduException if anything went wrong. + */ + public RowResultIterator nextRows() throws KuduException { + return KuduClient.joinAndHandleException(asyncScanner.nextRows()); + } + + /** + * Keep the current remote scanner alive. + *

+ * Keep the current remote scanner alive on the Tablet server for an + * additional time-to-live. This is useful if the interval in between + * nextRows() calls is big enough that the remote scanner might be garbage + * collected. The scanner time-to-live can be configured on the tablet + * server via the --scanner_ttl_ms configuration flag and has a default + * of 60 seconds. + *

+ * This does not invalidate any previously fetched results. + *

+ * Note that an exception thrown by this method should not be taken as indication + * that the scan has failed. Subsequent calls to nextRows() might still be successful, + * particularly if the scanner is configured to be fault tolerant. + * @throws KuduException if anything went wrong. + */ + public final void keepAlive() throws KuduException { + KuduClient.joinAndHandleException(asyncScanner.keepAlive()); + } + + /** + * Keep the current remote scanner alive by sending keep-alive requests periodically. + *

+ * startKeepAlivePeriodically() uses a timer to call keepAlive() periodically, + * which is defined by parameter keepAliveIntervalMS. It sends keep-alive requests to + * the server periodically using a separate thread. This is useful if the client + * takes long time to handle the fetched data before having the chance to call + * keepAlive(). This can be called after the scanner is opened and the timer can be + * stopped by calling stopKeepAlivePeriodically(). + *

+ * @throws KuduException if anything went wrong. + *

+ * * @return true if starting keep-alive timer successfully. + */ + public final boolean startKeepAlivePeriodically(int keepAliveIntervalMS) throws KuduException { + return asyncScanner.startKeepAlivePeriodically(keepAliveIntervalMS); + } + + /** + * Stop keeping the current remote scanner alive periodically. + *

+ * This function stops to send keep-alive requests to the server periodically. + * After function startKeepAlivePeriodically is called, this function can be used to + * stop the keep-alive timer at any time. The timer will be stopped automatically + * after finishing scanning. But it can also be stopped manually by calling this + * function. + *

+ * @return true if stopping keep-alive timer successfully. + */ + public final boolean stopKeepAlivePeriodically() { + return asyncScanner.stopKeepAlivePeriodically(); + } + + /** + * @return true if the scanner has been closed. + */ + public boolean isClosed() { + return asyncScanner.isClosed(); + } + + /** + * Closes this scanner (don't forget to call this when you're done with it!). + *

+ * Closing a scanner already closed has no effect. + * @return a deferred object that indicates the completion of the request + * @throws KuduException if anything went wrong. + */ + public RowResultIterator close() throws KuduException { + return KuduClient.joinAndHandleException(asyncScanner.close()); + } + + /** + * Returns the maximum number of rows that this scanner was configured to return. + * @return a long representing the maximum number of rows that can be returned + */ + public long getLimit() { + return asyncScanner.getLimit(); + } + + /** + * Returns if this scanner was configured to cache data blocks or not. + * @return true if this scanner will cache blocks, else else. + */ + public boolean getCacheBlocks() { + return asyncScanner.getCacheBlocks(); + } + + /** + * Returns the maximum number of bytes returned by the scanner, on each batch. + * @return a long representing the maximum number of bytes that a scanner can receive at once + * from a tablet server + */ + public long getBatchSizeBytes() { + return asyncScanner.getBatchSizeBytes(); + } + + /** + * Returns the ReadMode for this scanner. + * @return the configured read mode for this scanner + */ + public ReadMode getReadMode() { + return asyncScanner.getReadMode(); + } + + /** + * Returns the projection schema of this scanner. If specific columns were + * not specified during scanner creation, the table schema is returned. + * @return the projection schema for this scanner + */ + public Schema getProjectionSchema() { + return asyncScanner.getProjectionSchema(); + } + + /** + * Returns the resource metrics of this scanner. + * @return the resource metrics for this scanner + */ + public ResourceMetrics getResourceMetrics() { + return asyncScanner.getResourceMetrics(); + } + + /** + * Returns the RemoteTablet currently being scanned, if any. + */ + @InterfaceAudience.LimitedPrivate("Test") + public RemoteTablet currentTablet() { + return asyncScanner.currentTablet(); + } + + /** + * Gets the replica selection mechanism being used. + * + * @return the replica selection mechanism + */ + @InterfaceAudience.LimitedPrivate("Test") + ReplicaSelection getReplicaSelection() { + return asyncScanner.getReplicaSelection(); + } + + /** + * Returns the current value of the scanner's scan request timeout. + * @return the timeout value, in milliseconds + */ + public long getScanRequestTimeout() { + return asyncScanner.getScanRequestTimeout(); + } + + @Override + public KuduScannerIterator iterator() { + return new KuduScannerIterator(this, asyncScanner.getKeepAlivePeriodMs()); + } + + /** + * A Builder class to build {@link KuduScanner}. + * Use {@link KuduClient#newScannerBuilder} in order to get a builder instance. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static class KuduScannerBuilder + extends AbstractKuduScannerBuilder { + + KuduScannerBuilder(AsyncKuduClient client, KuduTable table) { + super(client, table); + } + + /** + * Builds a {@link KuduScanner} using the passed configurations. + * @return a new {@link KuduScanner} + */ + @Override + public KuduScanner build() { + return new KuduScanner(new AsyncKuduScanner( + client, table, projectedColumnNames, projectedColumnIndexes, readMode, isFaultTolerant, + scanRequestTimeout, predicates, limit, cacheBlocks, prefetching, lowerBoundPrimaryKey, + upperBoundPrimaryKey, startTimestamp, htTimestamp, batchSizeBytes, + PartitionPruner.create(this), replicaSelection, keepAlivePeriodMs, queryId)); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduScannerIterator.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduScannerIterator.java new file mode 100644 index 0000000000..5318cd8d9a --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduScannerIterator.java @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.Iterator; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * An iterator for the RowResults of a KuduScanner. + * Exhausting this iterator means that all of the rows from a KuduScanner have been read. + * + * This iterator also handles sending keep alive requests to ensure the scanner + * does not time out. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class KuduScannerIterator implements Iterator { + + private final KuduScanner scanner; + private final long keepAlivePeriodMs; + + private RowResultIterator currentIterator = RowResultIterator.empty(); + private long lastKeepAliveTimeMs = System.currentTimeMillis(); + + KuduScannerIterator(KuduScanner scanner, long keepAlivePeriodMs) { + this.scanner = scanner; + this.keepAlivePeriodMs = keepAlivePeriodMs; + } + + /** + * Calls the keepAlive API on the current scanner if the keepAlivePeriodMs has passed. + */ + private void keepKuduScannerAlive() throws KuduException { + long now = System.currentTimeMillis(); + if (now >= lastKeepAliveTimeMs + keepAlivePeriodMs && !scanner.isClosed()) { + scanner.keepAlive(); + lastKeepAliveTimeMs = now; + } + } + + /** + * Special implementation of hasNext that calls a callback each time + * {@link KuduScanner#nextRows} is called. + * + * @param nextRowsCallback the NextRowsCallback to call + * @return {@code true} if the iteration has more elements + */ + @InterfaceAudience.LimitedPrivate("Spark") + public boolean hasNext(NextRowsCallback nextRowsCallback) { + try { + while (!currentIterator.hasNext() && scanner.hasMoreRows()) { + currentIterator = scanner.nextRows(); + if (nextRowsCallback != null) { + nextRowsCallback.call(currentIterator.getNumRows()); + } + } + keepKuduScannerAlive(); + return currentIterator.hasNext(); + } catch (KuduException ex) { + throw new RuntimeException(ex); + } + } + + @Override + public boolean hasNext() { + return hasNext(null); + } + + @Override + public RowResult next() { + return currentIterator.next(); + } + + @InterfaceAudience.LimitedPrivate("Spark") + public abstract static class NextRowsCallback { + + /** + * @param numRows The number of rows returned from the + * {@link KuduScanner#nextRows} call. + */ + public abstract void call(int numRows); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduSession.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduSession.java new file mode 100644 index 0000000000..fcebc137f3 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduSession.java @@ -0,0 +1,211 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.List; + +import com.stumbleupon.async.Deferred; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Synchronous version of {@link AsyncKuduSession}. + * Offers the same API but with blocking methods.

+ * + * This class is not thread-safe.

+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class KuduSession implements SessionConfiguration { + + private static final Logger LOG = LoggerFactory.getLogger(KuduSession.class); + private final AsyncKuduSession session; + + KuduSession(AsyncKuduSession session) { + this.session = session; + } + + /** + * Apply a given {@link Operation} to Kudu as part of this session. + * + *

+ * This is a blocking call that has different behavior based on the configured flush mode: + * + *

    + *
  • {@link SessionConfiguration.FlushMode#AUTO_FLUSH_SYNC AUTO_FLUSH_SYNC}: + * the call returns when the operation is persisted, else it throws an exception. + * + *
  • {@link SessionConfiguration.FlushMode#AUTO_FLUSH_BACKGROUND AUTO_FLUSH_BACKGROUND}: + * the call returns when the operation has been added to the buffer. + * This call should normally perform only fast in-memory operations but + * it may have to wait when the buffer is full and there's another buffer being flushed. Row + * errors can be checked by calling {@link #countPendingErrors()} and can be retrieved by calling + * {@link #getPendingErrors()}. + * + *
  • {@link SessionConfiguration.FlushMode#MANUAL_FLUSH MANUAL_FLUSH}: + * the call returns when the operation has been added to the buffer, else it throws a + * {@link KuduException} if the buffer is full. + *
+ * + *

+ * Note: {@link PleaseThrottleException} is handled by this method and will not be thrown, unlike + * with {@link AsyncKuduSession#apply AsyncKuduSession.apply()}. + * + * @param operation operation to apply + * @return an OperationResponse for the applied Operation + * @throws KuduException if anything went wrong + * @see SessionConfiguration.FlushMode FlushMode + */ + public OperationResponse apply(Operation operation) throws KuduException { + while (true) { + try { + Deferred d = session.apply(operation); + if (getFlushMode() == FlushMode.AUTO_FLUSH_SYNC) { + return d.join(); + } + break; + } catch (PleaseThrottleException ex) { + try { + ex.getDeferred().join(); + } catch (Exception e) { + // This is the error response from the buffer that was flushing, + // we can't do much with it at this point. + LOG.error("Previous batch had this exception", e); + } + } catch (Exception e) { + throw KuduException.transformException(e); + } + } + return null; + } + + /** + * Blocking call that force flushes this session's buffers. Data is persisted when this call + * returns, else it will throw an exception. + * @return a list of OperationResponse, one per operation that was flushed + * @throws KuduException if anything went wrong + */ + public List flush() throws KuduException { + return KuduClient.joinAndHandleException(session.flush()); + } + + /** + * Blocking call that flushes the buffers (see {@link #flush()}) and closes the sessions. + * @return List of OperationResponse, one per operation that was flushed + * @throws KuduException if anything went wrong + */ + public List close() throws KuduException { + return KuduClient.joinAndHandleException(session.close()); + } + + @Override + public FlushMode getFlushMode() { + return session.getFlushMode(); + } + + @Override + public void setFlushMode(FlushMode flushMode) { + session.setFlushMode(flushMode); + } + + @Override + public void setMutationBufferSpace(int numOps, long maxSize) { + session.setMutationBufferSpace(numOps, maxSize); + } + + @Override + public void setErrorCollectorSpace(int size) { + session.setErrorCollectorSpace(size); + } + + /** + * @deprecated + */ + @Override + @Deprecated + public void setMutationBufferLowWatermark(float mutationBufferLowWatermarkPercentage) { + LOG.warn("setMutationBufferLowWatermark is deprecated"); + } + + @Override + public void setFlushInterval(int intervalMillis) { + session.setFlushInterval(intervalMillis); + } + + @Override + public long getTimeoutMillis() { + return session.getTimeoutMillis(); + } + + @Override + public void setTimeoutMillis(long timeout) { + session.setTimeoutMillis(timeout); + } + + @Override + public boolean isClosed() { + return session.isClosed(); + } + + @Override + public boolean hasPendingOperations() { + return session.hasPendingOperations(); + } + + @Override + public void setExternalConsistencyMode(ExternalConsistencyMode consistencyMode) { + session.setExternalConsistencyMode(consistencyMode); + } + + @Override + public boolean isIgnoreAllDuplicateRows() { + return session.isIgnoreAllDuplicateRows(); + } + + @Override + public void setIgnoreAllDuplicateRows(boolean ignoreAllDuplicateRows) { + session.setIgnoreAllDuplicateRows(ignoreAllDuplicateRows); + } + + @Override + public boolean isIgnoreAllNotFoundRows() { + return session.isIgnoreAllNotFoundRows(); + } + + @Override + public void setIgnoreAllNotFoundRows(boolean ignoreAllNotFoundRows) { + session.setIgnoreAllNotFoundRows(ignoreAllNotFoundRows); + } + + @Override + public int countPendingErrors() { + return session.countPendingErrors(); + } + + @Override + public RowErrorsAndOverflowStatus getPendingErrors() { + return session.getPendingErrors(); + } + + @Override + public ResourceMetrics getWriteOpMetrics() { + return session.getWriteOpMetrics(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduTable.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduTable.java new file mode 100644 index 0000000000..d8838c1aac --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduTable.java @@ -0,0 +1,411 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import com.google.common.base.Predicates; +import com.google.common.collect.Iterators; +import com.stumbleupon.async.Deferred; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Schema; + +/** + * A KuduTable represents a table on a particular cluster. It holds the current + * schema of the table. Any given KuduTable instance belongs to a specific AsyncKuduClient + * instance. + * + * Upon construction, the table is looked up in the catalog (or catalog cache), + * and the schema fetched for introspection. The schema is not kept in sync with the master. + * + * This class is thread-safe. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class KuduTable { + + private final Schema schema; + private final PartitionSchema partitionSchema; + private final AsyncKuduClient client; + private final String name; + private final String tableId; + private final int numReplicas; + private final Map extraConfig; + private final String owner; + private final String comment; + + /** + * Package-private constructor, use {@link KuduClient#openTable(String)} to get an instance. + * @param client the client this instance belongs to + * @param name this table's name + * @param tableId this table's UUID + * @param schema this table's schema + * @param partitionSchema this table's partition schema + * @param numReplicas this table's replication factor + * @param extraConfig this table's extra configuration properties + * @param owner this table's owner + * @param comment this table's comment + */ + KuduTable(AsyncKuduClient client, String name, String tableId, + Schema schema, PartitionSchema partitionSchema, int numReplicas, + Map extraConfig, String owner, String comment) { + this.schema = schema; + this.partitionSchema = partitionSchema; + this.client = client; + this.name = name; + this.tableId = tableId; + this.numReplicas = numReplicas; + this.extraConfig = extraConfig; + this.owner = owner; + this.comment = comment; + } + + /** + * Get this table's schema, as of the moment this instance was created. + * @return this table's schema + */ + public Schema getSchema() { + return this.schema; + } + + /** + * Gets the table's partition schema. + * + * This method is new, and not considered stable or suitable for public use. + * + * @return the table's partition schema. + */ + @InterfaceAudience.LimitedPrivate("Impala") + @InterfaceStability.Unstable + public PartitionSchema getPartitionSchema() { + return partitionSchema; + } + + /** + * Get this table's name. + * @return this table's name + */ + public String getName() { + return this.name; + } + + /** + * Get this table's unique identifier. + * @return this table's tableId + */ + public String getTableId() { + return tableId; + } + + /** + * Get this table's replication factor. + * @return this table's replication factor + */ + public int getNumReplicas() { + return numReplicas; + } + + /** + * Get this table's extra configuration properties. + * @return this table's extra configuration properties + */ + public Map getExtraConfig() { + return extraConfig; + } + + /** + * Get this table's owner. + * @return this table's owner or an empty string if the table was created without owner on a + * version of Kudu that didn't automatically assign an owner. + */ + public String getOwner() { + return owner; + } + + /** + * Get this table's comment. + * + * @return this table's comment. + */ + public String getComment() { + return comment; + } + + /** + * Get the async client that created this instance. + * @return an async kudu client + */ + public AsyncKuduClient getAsyncClient() { + return this.client; + } + + /** + * Get a new insert configured with this table's schema. The returned object should not be reused. + * @return an insert with this table's schema + */ + public Insert newInsert() { + return new Insert(this); + } + + /** + * Get a new update configured with this table's schema. The returned object should not be reused. + * @return an update with this table's schema + */ + public Update newUpdate() { + return new Update(this); + } + + /** + * Get a new delete configured with this table's schema. The returned object should not be reused. + * @return a delete with this table's schema + */ + public Delete newDelete() { + return new Delete(this); + } + + /** + * Get a new upsert configured with this table's schema. The returned object should not be reused. + * @return an upsert with this table's schema + * @throws UnsupportedOperationException if the table has auto-incrementing column + */ + public Upsert newUpsert() { + return new Upsert(this); + } + + /** + * Get a new upsert ignore configured with this table's schema. The operation ignores errors of + * updating immutable cells in a row. This is useful when upserting rows in a table with immutable + * columns. + * @return an upsert with this table's schema + * @throws UnsupportedOperationException if the table has auto-incrementing column + */ + public UpsertIgnore newUpsertIgnore() { + return new UpsertIgnore(this); + } + + /** + * Get a new insert ignore configured with this table's schema. An insert ignore will + * ignore duplicate row errors. This is useful when the same insert may be sent multiple times. + * The returned object should not be reused. + * @return an insert ignore with this table's schema + */ + public InsertIgnore newInsertIgnore() { + return new InsertIgnore(this); + } + + /** + * Get a new update ignore configured with this table's schema. An update ignore will + * ignore missing row errors and updating on immutable columns errors. This is useful to + * update a row only if it exists, or update a row with immutable columns. + * The returned object should not be reused. + * @return an update ignore with this table's schema + */ + public UpdateIgnore newUpdateIgnore() { + return new UpdateIgnore(this); + } + + /** + * Get a new delete ignore configured with this table's schema. An delete ignore will + * ignore missing row errors. This is useful to delete a row only if it exists. + * The returned object should not be reused. + * @return a delete ignore with this table's schema + */ + public DeleteIgnore newDeleteIgnore() { + return new DeleteIgnore(this); + } + + /** + * Asynchronously get all the tablets for this table. + * @param deadline max time spent in milliseconds for the deferred result of this method to + * get called back, if deadline is reached, the deferred result will get erred back + * @return a {@link Deferred} object that yields a list containing the metadata and + * locations for each of the tablets in the table + * @deprecated use the {@link KuduScanToken} API + */ + @Deprecated + public Deferred> asyncGetTabletsLocations(long deadline) { + return asyncGetTabletsLocations(null, null, deadline); + } + + /** + * Asynchronously get all or some tablets for this table. + * @param startKey where to start in the table, pass null to start at the beginning + * @param endKey where to stop in the table (exclusive), pass null to get all the tablets until + * the end of the table + * @param deadline max time spent in milliseconds for the deferred result of this method to + * get called back, if deadline is reached, the deferred result will get erred back + * @return a {@link Deferred} object that yields a list containing the metadata and locations + * for each of the tablets in the table + * @deprecated use the {@link KuduScanToken} API + */ + @Deprecated + public Deferred> asyncGetTabletsLocations(byte[] startKey, + byte[] endKey, + long deadline) { + return client.locateTable(this, startKey, endKey, + AsyncKuduClient.FETCH_TABLETS_PER_RANGE_LOOKUP, + deadline); + } + + /** + * Get all the tablets for this table. This may query the master multiple times if there + * are a lot of tablets. + * @param deadline deadline in milliseconds for this method to finish + * @return a list containing the metadata and locations for each of the tablets in the + * table + * @throws Exception + * @deprecated use the {@link KuduScanToken} API + */ + @Deprecated + public List getTabletsLocations(long deadline) throws Exception { + return getTabletsLocations(null, null, deadline); + } + + /** + * Get all or some tablets for this table. This may query the master multiple times if there + * are a lot of tablets. + * This method blocks until it gets all the tablets. + * @param startKey where to start in the table, pass null to start at the beginning + * @param endKey where to stop in the table (exclusive), pass null to get all the tablets until + * the end of the table + * @param deadline deadline in milliseconds for this method to finish + * @return a list containing the metadata and locations for each of the tablets in the + * table + * @throws Exception + * @deprecated use the {@link KuduScanToken} API + */ + @Deprecated + public List getTabletsLocations(byte[] startKey, + byte[] endKey, + long deadline) throws Exception { + return client.syncLocateTable(this, startKey, endKey, + AsyncKuduClient.FETCH_TABLETS_PER_RANGE_LOOKUP, + deadline); + } + + /** + * Retrieves a formatted representation of this table's range partitions. The + * range partitions will be returned in sorted order by value, and will + * contain no duplicates. + * + * @param timeout the timeout of the operation + * @return a list of the formatted range partitions + */ + @InterfaceAudience.LimitedPrivate("Impala") + @InterfaceStability.Unstable + public List getFormattedRangePartitions(long timeout) throws Exception { + List rangePartitions = getRangePartitions(timeout); + List formattedPartitions = new ArrayList<>(); + for (Partition partition : rangePartitions) { + formattedPartitions.add(partition.formatRangePartition(this, false)); + } + return formattedPartitions; + } + + /** + * Retrieves a formatted representation of this table's range partitions along + * with hash schema output for each range. The range partitions are returned + * in sorted order by value and contain no duplicates. + * + * @param timeout the timeout of the operation + * @return a list of the formatted range partitions with hash schema for each + */ + @InterfaceAudience.LimitedPrivate("Impala") + @InterfaceStability.Unstable + public List getFormattedRangePartitionsWithHashSchema(long timeout) + throws Exception { + List rangePartitions = getRangePartitions(timeout); + List formattedPartitions = new ArrayList<>(); + for (Partition partition : rangePartitions) { + formattedPartitions.add(partition.formatRangePartition(this, true)); + } + return formattedPartitions; + } + + /** + * Retrieves this table's range partitions. The range partitions will be returned + * in sorted order by value, and will contain no duplicates. + * + * @param timeout the timeout of the operation + * @return a list of the formatted range partitions + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public List getRangePartitions(long timeout) throws Exception { + // TODO: This could be moved into the RangeSchemaPB returned from server + // to avoid an extra call to get the range partitions. + return getRangePartitionsHelper(timeout, false); + } + + /** + * Only retrieves this table's range partitions that contain the table wide hash schema. The + * range partitions will be returned in sorted order by value, and will contain no duplicates. + * + * @param timeout the timeout of the operation + * @return a list of the formatted range partitions + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public List getRangePartitionsWithTableHashSchema(long timeout) throws Exception { + return getRangePartitionsHelper(timeout, true); + } + + /** + * Helper method that retrieves the table's range partitions. If onlyTableHashSchema is evaluated + * to true, then only range partitions that have the table wide hash schema will be returned. The + * range partitions will be returned in sorted order by value and will contain no duplicates. + * @param timeout the timeout of the operation + * @param onlyTableHashSchema whether to filter out the partitions with custom hash schema + * @return a list of the formatted range partitions + */ + private List getRangePartitionsHelper(long timeout, + boolean onlyTableHashSchema) throws Exception { + List rangePartitions = new ArrayList<>(); + List scanTokens = new KuduScanToken.KuduScanTokenBuilder(client, this) + .setTimeout(timeout) + .build(); + for (KuduScanToken token : scanTokens) { + Partition partition = token.getTablet().getPartition(); + // Filter duplicate range partitions by taking only the tablets whose hash + // partitions are all 0s. + if (!Iterators.all(partition.getHashBuckets().iterator(), Predicates.equalTo(0))) { + continue; + } + // If onlyTableHashSchema is true, filter out any partitions + // that are part of a range that contains a custom hash schema. + if (onlyTableHashSchema && partitionSchema.getHashSchemaForRange(partition.rangeKeyStart) != + partitionSchema.getHashBucketSchemas()) { + continue; + } + rangePartitions.add(partition); + } + return rangePartitions; + } + + /** + * Get this table's statistics. + * @return this table's statistics + */ + public KuduTableStatistics getTableStatistics() throws KuduException { + return client.syncClient().getTableStatistics(name); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduTableStatistics.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduTableStatistics.java new file mode 100644 index 0000000000..3ebbae95cd --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduTableStatistics.java @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Represent statistics belongs to a specific kudu table. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class KuduTableStatistics { + + private final long onDiskSize; + private final long liveRowCount; + + /** + * @param onDiskSize the table's on disk size + * @param liveRowCount the table's live row count + */ + KuduTableStatistics(long onDiskSize, + long liveRowCount) { + this.onDiskSize = onDiskSize; + this.liveRowCount = liveRowCount; + } + + /** + * Get the table's on disk size, this statistic is pre-replication. + * @return Table's on disk size + */ + public long getOnDiskSize() { + return onDiskSize; + } + + /** + * Get the table's live row count, this statistic is pre-replication. + * @return Table's live row count + */ + public long getLiveRowCount() { + return liveRowCount; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduTransaction.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduTransaction.java new file mode 100644 index 0000000000..ff59530430 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/KuduTransaction.java @@ -0,0 +1,817 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import com.google.common.base.Preconditions; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.stumbleupon.async.Callback; +import com.stumbleupon.async.Deferred; +import io.netty.util.Timeout; +import io.netty.util.TimerTask; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.transactions.Transactions; +import org.apache.kudu.transactions.Transactions.TxnTokenPB; + +/** + * A handle for a multi-row transaction in Kudu. + *

+ * Once created using {@link KuduClient#newTransaction} or + * {@link KuduTransaction#deserialize} methods, an instance of this class + * can be used to commit or rollback the underlying multi-row transaction. To + * issue write operations as a part of the transaction, use the + * {@link KuduTransaction#newKuduSession} or + * {@link KuduTransaction#newAsyncKuduSession} methods to create a new + * transactional session and apply write operations using it. + *

+ * The {@link KuduTransaction} implements {@link AutoCloseable} and should be + * used with try-with-resource code construct. Once an object of this class + * is constructed, it starts sending automatic keep-alive heartbeat messages + * to keep the underlying transaction open. Once the object goes out of scope + * and {@link KuduTransaction#close} is automatically called by the Java + * runtime (or the method is called explicitly), the heartbeating stops and the + * transaction is automatically aborted by the system after not receiving + * heartbeat messages for a few keep-alive intervals. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class KuduTransaction implements AutoCloseable { + + /** + * A utility class to help with the serialization of {@link KuduTransaction}. + *

+ * As of now, the single purpose of this class is to control the keepalive + * behavior for the {@link KuduTransaction} handle once it's deserialized from + * a token. In future, the list of configurable parameters might be extended + * (e.g., add commit and abort permissions, i.e. whether a handle can be used + * to commit and/or abort the underlying transaction). + */ + public static class SerializationOptions { + + private boolean enableKeepalive; + + /** + * Construct an object with default settings. + */ + SerializationOptions() { + this.enableKeepalive = false; + } + + /** + * @return whether the transaction handle produced from an instance of + * {@link KuduTransaction} by the {@link KuduTransaction#serialize}, + * {@link KuduTransaction#deserialize} call sequence will send + * keepalive messages to avoid automatic rollback of the underlying + * transaction. + */ + public boolean isKeepaliveEnabled() { + return enableKeepalive; + } + + /** + * Toggle the automatic sending of keepalive messages for transaction handle. + *

+ * This method toggles the automatic sending of keepalive messages for a + * deserialized transaction handle that is created from the result serialized + * token upon calling {@link KuduTransaction#serialize} method. + *

+ * No keepalive heartbeat messages are sent from a transaction handle whose + * source token was created with the default "keepalive disabled" setting. + * The idea here is that the most common use case for using transaction + * tokens is of the "star topology" (see below), so it is enough to have + * just one top-level handle sending keepalive messages. Overall, having more + * than one actor sending keepalive messages for a transaction is acceptable + * but it puts needless load on a cluster. + *

+ * The most common use case for a transaction's handle + * serialization/deserialization is of the "star topology": a transaction is + * started by a top-level application which sends the transaction token + * produced by serializing the original transaction handle to other worker + * applications running concurrently, where the latter write their data + * in the context of the same transaction and report back to the top-level + * application, which in its turn initiates committing the transaction + * as needed. The important point is that the top-level application keeps the + * transaction handle around all the time from the start of the transaction + * to the very point when transaction is committed. Under the hood, the + * original transaction handle sends keepalive messages as required until + * commit phase is initiated, so the deserialized transaction handles which + * are used by the worker applications don't need to send keepalive messages. + *

+ * The other (less common) use case is of the "ring topology": a chain of + * applications work sequentially as a part of the same transaction, where + * the very first application starts the transaction, writes its data, and + * hands over the responsibility of managing the lifecycle of the transaction + * to other application down the chain. After doing so it may exit, so now + * only the next application has the active transaction handle, and so on it + * goes until the transaction is committed by the application in the end + * of the chain. In this scenario, every deserialized handle has to send + * keepalive messages to avoid automatic rollback of the transaction, + * and every application in the chain should call + * {@link SerializationOptions#setEnableKeepalive} when serializing + * its transaction handle into a transaction token to pass to the application + * next in the chain. + * + * @param enableKeepalive whether to enable sending keepalive messages for + * the {@link KuduTransaction} object once it is + * deserialized from the bytes to be produced by the + * {@link KuduTransaction#serialize} method. + */ + public SerializationOptions setEnableKeepalive(boolean enableKeepalive) { + this.enableKeepalive = enableKeepalive; + return this; + } + } + + private static final Logger LOG = + LoggerFactory.getLogger(KuduTransaction.class); + private static final SerializationOptions defaultSerializationOptions = + new SerializationOptions(); + private static final String ERRMSG_TXN_NOT_OPEN = + "transaction is not open for this handle"; + + private final AsyncKuduClient client; + private long txnId = AsyncKuduClient.INVALID_TXN_ID; + private int keepaliveMillis = 0; + private boolean keepaliveEnabled = true; + private boolean isInFlight = false; + private final Object isInFlightSync = new Object(); + private Timeout keepaliveTaskHandle = null; + private final Object keepaliveTaskHandleSync = new Object(); + private boolean isCommitStarted = false; + private final Object isCommitStartedSync = new Object(); + private List sessions = new ArrayList<>(); + + /** + * Create an instance of a transaction handle bound to the specified client. + *

+ * This constructor is used exclusively for the control paths involving + * {@link KuduClient#newTransaction} method. + * + * @param client client instance to operate with the underlying transaction + */ + KuduTransaction(AsyncKuduClient client) { + Preconditions.checkArgument(client != null); + this.client = client; + } + + /** + * Create an instance of a transaction handle for the specified parameters. + *

+ * This constructor is used exclusively for the control paths involving + * {@link KuduTransaction#deserialize}. + * + * @param client client instance to operate with the underlying transaction + * @param txnId transaction identifier + * @param keepaliveMillis keepalive timeout interval: if the backend isn't + * receiving keepalive messages at least every + * keepaliveMillis time interval, it automatically + * aborts the underlying transaction + * @param keepaliveEnabled whether the handle should automatically send + * keepalive messages to the backend + */ + KuduTransaction(AsyncKuduClient client, + long txnId, + int keepaliveMillis, + boolean keepaliveEnabled) { + Preconditions.checkArgument(client != null); + Preconditions.checkArgument(txnId > AsyncKuduClient.INVALID_TXN_ID); + Preconditions.checkArgument(keepaliveMillis >= 0); + this.client = client; + this.txnId = txnId; + this.keepaliveMillis = keepaliveMillis; + this.keepaliveEnabled = keepaliveEnabled; + + startKeepaliveHeartbeating(); + + this.isInFlight = true; + } + + /** + * Start a transaction. + *

+ * This method isn't a part of the public API, it's used only internally. + * + * @throws KuduException if something went wrong + */ + void begin() throws KuduException { + synchronized (isInFlightSync) { + // Perform a cursory state check to make sure begin() hasn't been called + // yet (this isn't intended to help if begin() is called concurrently from + // different threads, though). + Preconditions.checkState(!isInFlight); + } + + // Make corresponding call to TxnManager and process the response, + // in a synchronous way. + doBeginTransaction(); + + startKeepaliveHeartbeating(); + + // Once the heavy-lifting has successfully completed, mark this instance + // as a handle for an in-flight transaction. + synchronized (isInFlightSync) { + isInFlight = true; + } + } + + /** + * Create a new {@link AsyncKuduSession} based on this transaction. + *

+ * All write operations using the result session will be performed in the + * context of this transaction. + * + * @return a new {@link AsyncKuduSession} instance + */ + public AsyncKuduSession newAsyncKuduSession() { + synchronized (isInFlightSync) { + Preconditions.checkState(isInFlight, ERRMSG_TXN_NOT_OPEN); + } + + AsyncKuduSession session = null; + synchronized (isCommitStartedSync) { + Preconditions.checkState(!isCommitStarted, "commit already started"); + session = client.newTransactionalSession(txnId); + sessions.add(session); + } + Preconditions.checkNotNull(session); + return session; + } + + /** + * Create a new {@link KuduSession} based on this transaction. + *

+ * All write operations using the result session will be performed in the + * context of this transaction. + * + * @return a new {@link KuduSession} instance + */ + public KuduSession newKuduSession() { + return new KuduSession(newAsyncKuduSession()); + } + + /** + * Commit the multi-row distributed transaction represented by this handle. + *

+ * This method starts committing the transaction and awaits for the commit + * phase to finalize. + * + * @throws KuduException if something went wrong + */ + public void commit() throws KuduException { + commitWithMode(CommitMode.WAIT_FOR_COMPLETION); + } + + /** + * Start committing the multi-row distributed transaction represented by + * this handle. + *

+ * This method only starts committing the transaction, not awaiting for the + * commit phase to finalize. Use {@link KuduTransaction#isCommitComplete()} + * to check whether the transaction is committed. + * + * @throws KuduException if something went wrong upon starting to commit + */ + public void startCommit() throws KuduException { + commitWithMode(CommitMode.START_ONLY); + } + + /** + * Check whether the commit phase for a transaction is complete. + * + * @return {@code true} if transaction has finalized, otherwise {@code false} + * @throws NonRecoverableException with Status.Aborted() + * if transaction has been or is being aborted + * @throws NonRecoverableException with Status.IllegalState() + * if transaction is still open (i.e. commit() hasn't been called yet) + * @throws NonRecoverableException with Status.NotSupported() + * if transaction is in unexpected state (non-compatible backend?) + * @throws KuduException if an error happens while querying the system about + * the state of the transaction + */ + public boolean isCommitComplete() throws KuduException { + Deferred d = isTransactionCommittedAsync(); + GetTransactionStateResponse resp = KuduClient.joinAndHandleException(d); + final Transactions.TxnStatePB txnState = resp.txnState(); + if (resp.hasCommitTimestamp()) { + client.updateLastPropagatedTimestamp(resp.getCommitTimestamp()); + } + switch (txnState) { + case ABORT_IN_PROGRESS: + throw new NonRecoverableException(Status.Aborted("transaction is being aborted")); + case ABORTED: + throw new NonRecoverableException(Status.Aborted("transaction was aborted")); + case OPEN: + throw new NonRecoverableException(Status.IllegalState("transaction is still open")); + case COMMITTED: + return true; + case FINALIZE_IN_PROGRESS: + case COMMIT_IN_PROGRESS: + return false; + default: + throw new NonRecoverableException(Status.NotSupported( + "unexpected transaction state: " + txnState.toString())); + } + } + + /** + * Rollback the multi-row distributed transaction represented by this object. + *

+ * This method initiates rolling back the transaction and returns right after + * that. The system takes care of the rest. Once the control returns and + * no exception is thrown, a client have a guarantee that all write + * operations issued in the context of this transaction cannot be seen seen + * outside. + * + * @throws KuduException if something went wrong + */ + public void rollback() throws KuduException { + Preconditions.checkState(isInFlight, ERRMSG_TXN_NOT_OPEN); + doRollbackTransaction(); + // Now, there is no need to continue sending keepalive messages. + synchronized (keepaliveTaskHandleSync) { + if (keepaliveTaskHandle != null) { + LOG.debug("stopping keepalive heartbeating after rollback (txn ID {})", txnId); + keepaliveTaskHandle.cancel(); + } + } + + // Once everything else is completed successfully, mark the transaction as + // no longer in flight. + synchronized (isInFlightSync) { + isInFlight = false; + } + } + + /** + * Export information on the underlying transaction in a serialized form. + *

+ * This method transforms this handle into its serialized representation. + *

+ * The serialized information on a Kudu transaction can be passed among + * different Kudu clients running at multiple nodes, so those separate + * Kudu clients can perform operations to be a part of the same distributed + * transaction. The resulting string is referred as "transaction token" and + * it can be deserialized into a transaction handle (i.e. an object of this + * class) via the {@link KuduTransaction#deserialize} method. + *

+ * This method doesn't perform any RPC under the hood. + *

+ * The representation of the data in the serialized form (i.e. the format of + * a Kudu transaction token) is an implementation detail, not a part of the + * public API and can be changed without notice. + * + * @return the serialized form of this transaction handle + * @throws IOException if serialization fails + */ + public byte[] serialize(SerializationOptions options) throws IOException { + LOG.debug("serializing handle (txn ID {})", txnId); + Preconditions.checkState( + txnId != AsyncKuduClient.INVALID_TXN_ID, + "invalid transaction handle"); + TxnTokenPB.Builder b = TxnTokenPB.newBuilder(); + b.setTxnId(txnId); + b.setEnableKeepalive(options.isKeepaliveEnabled()); + b.setKeepaliveMillis(keepaliveMillis); + TxnTokenPB message = b.build(); + byte[] buf = new byte[message.getSerializedSize()]; + CodedOutputStream cos = CodedOutputStream.newInstance(buf); + message.writeTo(cos); + cos.flush(); + return buf; + } + + /** + * A shortcut for the {@link KuduTransaction#serialize(SerializationOptions)} + * method invoked with default-constructed {@link SerializationOptions}. + */ + public byte[] serialize() throws IOException { + return serialize(defaultSerializationOptions); + } + + /** + * Re-create KuduTransaction object given its serialized representation. + *

+ * This method doesn't perform any RPC under the hood. The newly created + * object automatically does or does not send keep-alive messages depending + * on the {@link SerializationOptions#isKeepaliveEnabled} setting when + * the original {@link KuduTransaction} object was serialized using + * {@link KuduTransaction#serialize} method. + *

+ * @param client Client instance to bound the result object to + * @param buf serialized representation of a {@link KuduTransaction} object + * @return Operation result status. + * @throws IOException if deserialization fails + */ + public static KuduTransaction deserialize( + byte[] buf, AsyncKuduClient client) throws IOException { + TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + final long txnId = pb.getTxnId(); + final int keepaliveMillis = pb.getKeepaliveMillis(); + final boolean keepaliveEnabled = + pb.hasEnableKeepalive() && pb.getEnableKeepalive(); + return new KuduTransaction(client, txnId, keepaliveMillis, keepaliveEnabled); + } + + /** + * Stop keepalive heartbeating, if any was in progress for this transaction + * handle. + *

+ * This method is called automatically when the object goes out of scope + * as prescribed for {@link AutoCloseable}. + *

+ * This method doesn't throw according to the recommendations for + * {@link AutoCloseable#close}. In case of an error, this method just logs + * the corresponding error message. + */ + @Override + public void close() { + try { + synchronized (keepaliveTaskHandleSync) { + if (keepaliveTaskHandle != null) { + LOG.debug("stopping keepalive heartbeating (txn ID {})", txnId); + keepaliveTaskHandle.cancel(); + } + } + } catch (Exception e) { + LOG.error("exception while automatically rolling back a transaction", e); + } + } + + private void doBeginTransaction() throws KuduException { + BeginTransactionRequest request = new BeginTransactionRequest( + client.getMasterTable(), + client.getTimer(), + client.getDefaultAdminOperationTimeoutMs()); + Deferred d = client.sendRpcToTablet(request); + BeginTransactionResponse resp = KuduClient.joinAndHandleException(d); + txnId = resp.txnId(); + keepaliveMillis = resp.keepaliveMillis(); + } + + private void doRollbackTransaction() throws KuduException { + AbortTransactionRequest request = new AbortTransactionRequest( + client.getMasterTable(), + client.getTimer(), + client.getDefaultAdminOperationTimeoutMs(), + txnId); + Deferred d = client.sendRpcToTablet(request); + KuduClient.joinAndHandleException(d); + } + + private CommitTransactionRequest doCommitTransaction() throws KuduException { + CommitTransactionRequest request = new CommitTransactionRequest( + client.getMasterTable(), + client.getTimer(), + client.getDefaultAdminOperationTimeoutMs(), + txnId); + Deferred d = client.sendRpcToTablet(request); + KuduClient.joinAndHandleException(d); + return request; + } + + /** + * Transaction commit mode. + */ + private enum CommitMode { + /** Only start/initiate the commit phase, don't wait for the completion. */ + START_ONLY, + + /** Start the commit phase and wait until it succeeds or fails. */ + WAIT_FOR_COMPLETION, + } + + private void commitWithMode(CommitMode mode) throws KuduException { + synchronized (isInFlightSync) { + Preconditions.checkState(isInFlight, ERRMSG_TXN_NOT_OPEN); + } + synchronized (isCommitStartedSync) { + isCommitStarted = true; + } + for (AsyncKuduSession s : sessions) { + if (mode == CommitMode.WAIT_FOR_COMPLETION) { + // Flush each session's pending operations. + List results = + KuduClient.joinAndHandleException(s.flush()); + for (OperationResponse result : results) { + if (result.hasRowError()) { + throw new NonRecoverableException(Status.Incomplete(String.format( + "failed to flush a transactional session: %s", + result.getRowError().toString()))); + } + } + } else { + // Make sure no write operations are pending in any of the transactional + // sessions, i.e. everything has been flushed. This is rather a cursory + // check, it's not intended to protect against concurrent activity on + // transaction sessions when startCommit() is being called. + if (s.hasPendingOperations()) { + throw new NonRecoverableException(Status.IllegalState( + "cannot start committing transaction: at least one " + + "transactional session has write operations pending")); + } + } + } + CommitTransactionRequest req = doCommitTransaction(); + // Now, there is no need to continue sending keepalive messages: the + // transaction should be in COMMIT_IN_PROGRESS state after successful + // completion of the calls above, and the backend takes care of everything + // else: nothing is required from the client side to successfully complete + // the commit phase of the transaction past this point. + synchronized (keepaliveTaskHandleSync) { + if (keepaliveTaskHandle != null) { + LOG.debug("stopping keepalive heartbeating after initiating commit (txn ID {})", txnId); + keepaliveTaskHandle.cancel(); + } + } + + if (mode == CommitMode.WAIT_FOR_COMPLETION) { + Deferred txnState = + getDelayedIsTransactionCommittedDeferred(req); + KuduClient.joinAndHandleException(txnState); + } + + // Once everything else is completed successfully, mark the transaction as + // no longer in flight. + synchronized (isInFlightSync) { + isInFlight = false; + } + } + + private Deferred isTransactionCommittedAsync() { + GetTransactionStateRequest request = new GetTransactionStateRequest( + client.getMasterTable(), + client.getTimer(), + client.getDefaultAdminOperationTimeoutMs(), + txnId); + return client.sendRpcToTablet(request); + } + + Deferred getDelayedIsTransactionCommittedDeferred( + KuduRpc parent) { + // TODO(aserbin): By scheduling even the first RPC via timer, the sequence of + // RPCs is delayed by at least one timer tick, which is unfortunate for the + // case where the transaction is fully committed. + // + // Eliminating the delay by sending the first RPC immediately (and + // scheduling the rest via timer) would also allow us to replace this "fake" + // RPC with a real one. + KuduRpc fakeRpc = client.buildFakeRpc( + "GetTransactionState", parent); + + // Store the Deferred locally; callback() or errback() on the RPC will + // reset it and we'd return a different, non-triggered Deferred. + Deferred fakeRpcD = fakeRpc.getDeferred(); + + delayedIsTransactionCommitted( + fakeRpc, + isTransactionCommittedCb(fakeRpc), + isTransactionCommittedErrb(fakeRpc)); + return fakeRpcD; + } + + private void delayedIsTransactionCommitted( + final KuduRpc rpc, + final Callback, + GetTransactionStateResponse> callback, + final Callback errback) { + final class RetryTimer implements TimerTask { + @Override + public void run(final Timeout timeout) { + isTransactionCommittedAsync().addCallbacks(callback, errback); + } + } + + long sleepTimeMillis = client.getSleepTimeForRpcMillis(rpc); + if (rpc.timeoutTracker.wouldSleepingTimeoutMillis(sleepTimeMillis)) { + AsyncKuduClient.tooManyAttemptsOrTimeout(rpc, null); + return; + } + AsyncKuduClient.newTimeout(client.getTimer(), new RetryTimer(), sleepTimeMillis); + } + + /** + * Returns a callback to be called upon completion of GetTransactionState RPC. + * If the transaction is committed, triggers the provided RPC's callback chain + * with 'txnResp' as its value. Otherwise, sends another GetTransactionState + * RPC after sleeping. + *

+ * @param rpc RPC that initiated this sequence of operations + * @return callback that will eventually return 'txnResp' + */ + private Callback, GetTransactionStateResponse> + isTransactionCommittedCb(final KuduRpc rpc) { + return resp -> { + if (resp.hasCommitTimestamp()) { + client.updateLastPropagatedTimestamp(resp.getCommitTimestamp()); + } + // Store the Deferred locally; callback() below will reset it and we'd + // return a different, non-triggered Deferred. + Deferred d = rpc.getDeferred(); + if (resp.isCommitted()) { + rpc.callback(resp); + } else if (resp.isAborted()) { + rpc.errback(new NonRecoverableException( + Status.Aborted("transaction was aborted"))); + } else { + rpc.attempt++; + delayedIsTransactionCommitted( + rpc, + isTransactionCommittedCb(rpc), + isTransactionCommittedErrb(rpc)); + } + return d; + }; + } + + private Callback isTransactionCommittedErrb( + final KuduRpc rpc) { + return e -> { + rpc.errback(e); + return e; + }; + } + + /** + * Return period for sending keepalive messages for the specified keepalive + * timeout (both in milliseconds). The latter is dictated by the backend + * which can automatically rollback a transaction after not receiving + * keepalive messages for longer than the specified timeout interval. + * Ideally, it would be enough to send a heartbeat message every + * {@code keepaliveMillis} interval, but given scheduling irregularities, + * client node timer's precision, and various network delays and latencies, + * it's safer to schedule sending keepalive messages from the client side + * more frequently. + * + * @param keepaliveMillis the keepalive timeout interval + * @return a proper period for sending keepalive messages from the client side + */ + private static long keepalivePeriodForTimeout(long keepaliveMillis) { + Preconditions.checkArgument(keepaliveMillis > 0, + "keepalive timeout must be a positive number"); + long period = keepaliveMillis / 2; + if (period <= 0) { + period = 1; + } + return period; + } + + /** + * Return timeout for sending heartbeat messages given the specified + * keepalive timeout for a transaction (both in milliseconds). If something + * goes wrong and keepalive RPC fails, it should be possible to retry sending + * keepalive message a couple of times before the transaction is automatically + * aborted by the backend after not receiving keepalive messages for longer + * than the keepalive timeout for the transaction. + * + * @param keepaliveMillis keepalive timeout interval for a transaction (ms) + * @return a proper timeout for sending keepalive RPC from the client side + */ + private static long keepaliveRequestTimeout(long keepaliveMillis) { + long timeout = keepalivePeriodForTimeout(keepaliveMillis) / 2; + if (timeout <= 0) { + timeout = 1; + } + return timeout; + } + + private void startKeepaliveHeartbeating() { + if (keepaliveEnabled) { + LOG.debug("starting keepalive heartbeating with period {} ms (txn ID {})", + keepalivePeriodForTimeout(keepaliveMillis), txnId); + doStartKeepaliveHeartbeating(); + } else { + LOG.debug("keepalive heartbeating disabled for this handle (txn ID {})", txnId); + } + } + + void doStartKeepaliveHeartbeating() { + Preconditions.checkState(keepaliveEnabled); + Preconditions.checkArgument(txnId > AsyncKuduClient.INVALID_TXN_ID); + synchronized (keepaliveTaskHandleSync) { + Preconditions.checkState(keepaliveTaskHandle == null, + "keepalive heartbeating has already started"); + long sleepTimeMillis = keepalivePeriodForTimeout(keepaliveMillis); + keepaliveTaskHandle = delayedSendKeepTransactionAlive(sleepTimeMillis, + getSendKeepTransactionAliveCB(), getSendKeepTransactionAliveEB()); + } + } + + /** + * Send keepalive message to TxnManager for this transaction. + * + * @return a future object to handle the results of the sent RPC + */ + private Deferred doSendKeepTransactionAlive() { + // The timeout for the keepalive RPC is dictated by the keepalive + // timeout for the transaction. + long timeoutMs = keepaliveRequestTimeout(keepaliveMillis); + KeepTransactionAliveRequest request = new KeepTransactionAliveRequest( + client.getMasterTable(), client.getTimer(), timeoutMs, txnId); + return client.sendRpcToTablet(request); + } + + /** + * Schedule a timer to send a KeepTransactionAlive RPC to TxnManager after + * sleepTimeMillis milliseconds. + * + * @param runAfterMillis time delta from now when to run the task + * @param callback callback to call on successfully sent RPC + * @param errback errback to call if something goes wrong with sending RPC + */ + private Timeout delayedSendKeepTransactionAlive( + long runAfterMillis, + final Callback callback, + final Callback errback) { + + final class RetryTimer implements TimerTask { + @Override + public void run(final Timeout timeout) { + doSendKeepTransactionAlive().addCallbacks(callback, errback); + } + } + + return AsyncKuduClient.newTimeout( + client.getTimer(), new RetryTimer(), runAfterMillis); + } + + private Callback getSendKeepTransactionAliveCB() { + // Time interval to wait before sending next KeepTransactionAlive request. + long sleepTimeMillis = keepalivePeriodForTimeout(keepaliveMillis); + return resp -> { + // Store the Deferred locally; callback() below will reset it and we'd + // return a different, non-triggered Deferred. + synchronized (keepaliveTaskHandleSync) { + if (!keepaliveTaskHandle.isCancelled()) { + keepaliveTaskHandle = delayedSendKeepTransactionAlive( + sleepTimeMillis, + getSendKeepTransactionAliveCB(), + getSendKeepTransactionAliveEB()); + } + } + return null; + }; + } + + private Callback getSendKeepTransactionAliveEB() { + return e -> { + boolean scheduleNextRun = false; + long nextRunAfterMillis = -1; + if (e instanceof RecoverableException) { + scheduleNextRun = true; + nextRunAfterMillis = keepaliveRequestTimeout(keepaliveMillis); + // Continue sending heartbeats as required: the recoverable exception + // means the condition is transient. However, attempt sending next + // keepalive message sooner since one has just been missed. + LOG.debug("continuing keepalive heartbeating (txn ID {}): {}", + txnId, e.toString()); + } else if (e instanceof NonRecoverableException) { + NonRecoverableException ex = (NonRecoverableException) e; + if (ex.getStatus().isTimedOut()) { + // Send next keepalive message sooner: it's been a long timeout. + scheduleNextRun = true; + nextRunAfterMillis = 1; + LOG.debug("sending keepalive message after prior one timed out (txn ID {}): {}", + txnId, e.toString()); + } else { + LOG.debug("terminating keepalive task (txn ID {}) due to exception {}", + txnId, e.toString()); + } + } + if (scheduleNextRun) { + Preconditions.checkArgument(nextRunAfterMillis >= 0); + synchronized (keepaliveTaskHandleSync) { + if (!keepaliveTaskHandle.isCancelled()) { + keepaliveTaskHandle = delayedSendKeepTransactionAlive( + nextRunAfterMillis, + getSendKeepTransactionAliveCB(), + getSendKeepTransactionAliveEB()); + } + } + } + return null; + }; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTablesRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTablesRequest.java new file mode 100644 index 0000000000..698a4e5218 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTablesRequest.java @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.List; + +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.client.ListTablesResponse.TableInfo; +import org.apache.kudu.master.Master; +import org.apache.kudu.util.Pair; + +@InterfaceAudience.Private +class ListTablesRequest extends KuduRpc { + + private final String nameFilter; + + private final boolean showSoftDeleted; + + ListTablesRequest(KuduTable masterTable, + String nameFilter, + boolean showSoftDeleted, + Timer timer, + long timeoutMillis) { + super(masterTable, timer, timeoutMillis); + this.nameFilter = nameFilter; + this.showSoftDeleted = showSoftDeleted; + } + + @Override + Message createRequestPB() { + final Master.ListTablesRequestPB.Builder builder = + Master.ListTablesRequestPB.newBuilder(); + if (nameFilter != null) { + builder.setNameFilter(nameFilter); + } + builder.setShowSoftDeleted(showSoftDeleted); + return builder.build(); + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return "ListTables"; + } + + @Override + Pair deserialize(CallResponse callResponse, + String tsUUID) throws KuduException { + final Master.ListTablesResponsePB.Builder respBuilder = + Master.ListTablesResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), respBuilder); + int tablesCount = respBuilder.getTablesCount(); + List tableInfos = new ArrayList<>(tablesCount); + for (Master.ListTablesResponsePB.TableInfo infoPb : respBuilder.getTablesList()) { + tableInfos.add(new TableInfo(infoPb.getId().toStringUtf8(), infoPb.getName())); + } + ListTablesResponse response = new ListTablesResponse(timeoutTracker.getElapsedMillis(), + tsUUID, tableInfos); + return new Pair( + response, respBuilder.hasError() ? respBuilder.getError() : null); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTablesResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTablesResponse.java new file mode 100644 index 0000000000..ef7fd27d52 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTablesResponse.java @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.List; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Objects; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class ListTablesResponse extends KuduRpcResponse { + + private final List tableInfosList; + private final List tablesList; + + ListTablesResponse(long elapsedMillis, String tsUUID, List tableInfosList) { + super(elapsedMillis, tsUUID); + List tablesList = new ArrayList<>(); + for (TableInfo info : tableInfosList) { + tablesList.add(info.getTableName()); + } + this.tableInfosList = tableInfosList; + this.tablesList = tablesList; + } + + /** + * Get the list of tables as specified in the request. + * @return a list of table names + */ + public List getTablesList() { + return tablesList; + } + + /** + * Get the list of tables as specified in the request. + * @return a list of TableInfo + */ + public List getTableInfosList() { + return tableInfosList; + } + + public static class TableInfo { + private final String tableId; + private final String tableName; + + TableInfo(String tableId, String tableName) { + this.tableId = tableId; + this.tableName = tableName; + } + + /** + * @return the table id + */ + public String getTableId() { + return tableId; + } + + /** + * @return the table name + */ + public String getTableName() { + return tableName; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof TableInfo)) { + return false; + } + TableInfo tableInfo = (TableInfo) o; + return Objects.equal(tableId, tableInfo.tableId) && + Objects.equal(tableName, tableInfo.tableName); + } + + @Override + public int hashCode() { + return Objects.hashCode(tableId, tableName); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("tableId", tableId) + .add("tableName", tableName) + .toString(); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletServersRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletServersRequest.java new file mode 100644 index 0000000000..9451c790c7 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletServersRequest.java @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.master.Master.ListTabletServersRequestPB; +import static org.apache.kudu.master.Master.ListTabletServersResponsePB; + +import java.util.ArrayList; +import java.util.List; + +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.util.Pair; + +@InterfaceAudience.Private +public class ListTabletServersRequest extends KuduRpc { + + public ListTabletServersRequest(KuduTable masterTable, + Timer timer, + long timeoutMillis) { + super(masterTable, timer, timeoutMillis); + } + + @Override + Message createRequestPB() { + return ListTabletServersRequestPB.getDefaultInstance(); + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return "ListTabletServers"; + } + + @Override + Pair deserialize(CallResponse callResponse, + String tsUUID) throws KuduException { + final ListTabletServersResponsePB.Builder respBuilder = + ListTabletServersResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), respBuilder); + int serversCount = respBuilder.getServersCount(); + List servers = new ArrayList<>(serversCount); + for (ListTabletServersResponsePB.Entry entry : respBuilder.getServersList()) { + servers.add(entry.getRegistration().getRpcAddresses(0).getHost()); + } + ListTabletServersResponse response = + new ListTabletServersResponse(timeoutTracker.getElapsedMillis(), + tsUUID, + serversCount, + servers); + return new Pair( + response, respBuilder.hasError() ? respBuilder.getError() : null); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletServersResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletServersResponse.java new file mode 100644 index 0000000000..fc979e46c7 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletServersResponse.java @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.List; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class ListTabletServersResponse extends KuduRpcResponse { + + private final int tabletServersCount; + private final List tabletServersList; + + /** + * @param elapsedMillis Time in milliseconds since RPC creation to now. + * @param tabletServersCount How many tablet servers the master is reporting. + * @param tabletServersList List of tablet servers. + */ + ListTabletServersResponse(long elapsedMillis, String tsUUID, + int tabletServersCount, List tabletServersList) { + super(elapsedMillis, tsUUID); + this.tabletServersCount = tabletServersCount; + this.tabletServersList = tabletServersList; + } + + /** + * Get the count of tablet servers as reported by the master. + * @return TS count. + */ + public int getTabletServersCount() { + return tabletServersCount; + } + + /** + * Get the list of tablet servers, as represented by their hostname. + * @return List of hostnames, one per TS. + */ + public List getTabletServersList() { + return tabletServersList; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletsRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletsRequest.java new file mode 100644 index 0000000000..7010bf3791 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletsRequest.java @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.List; + +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.tserver.Tserver; +import org.apache.kudu.util.Pair; + +@InterfaceAudience.Private +class ListTabletsRequest extends KuduRpc { + + ListTabletsRequest(Timer timer, long timeoutMillis) { + super(null, timer, timeoutMillis); + } + + @Override + Message createRequestPB() { + return Tserver.ListTabletsRequestPB.getDefaultInstance(); + } + + @Override + String serviceName() { + return TABLET_SERVER_SERVICE_NAME; + } + + @Override + String method() { + return "ListTablets"; + } + + @Override + Pair deserialize(CallResponse callResponse, + String tsUUID) throws KuduException { + final Tserver.ListTabletsResponsePB.Builder respBuilder = + Tserver.ListTabletsResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), respBuilder); + int serversCount = respBuilder.getStatusAndSchemaCount(); + List tablets = new ArrayList<>(serversCount); + for (Tserver.ListTabletsResponsePB.StatusAndSchemaPB info + : respBuilder.getStatusAndSchemaList()) { + tablets.add(info.getTabletStatus().getTabletId()); + } + ListTabletsResponse response = new ListTabletsResponse(timeoutTracker.getElapsedMillis(), + tsUUID, + tablets); + return new Pair<>(response, respBuilder.hasError() ? respBuilder.getError() : null); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletsResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletsResponse.java new file mode 100644 index 0000000000..4ad6b7d8e6 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ListTabletsResponse.java @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.List; + +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class ListTabletsResponse extends KuduRpcResponse { + + private final List tabletsList; + + ListTabletsResponse(long elapsedMillis, String tsUUID, List tabletsList) { + super(elapsedMillis, tsUUID); + this.tabletsList = tabletsList; + } + + /** + * Get the list of tablets as specified in the request. + * @return a list of tablet uuids + */ + public List getTabletsList() { + return tabletsList; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/LocatedTablet.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/LocatedTablet.java new file mode 100644 index 0000000000..8003a157bc --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/LocatedTablet.java @@ -0,0 +1,152 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.kudu.client; + +import java.util.List; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.consensus.Metadata.RaftPeerPB.Role; + +/** + * Information about the locations of tablets in a Kudu table. + * This should be treated as immutable data (it does not reflect + * any updates the client may have heard since being constructed). + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class LocatedTablet { + private final Partition partition; + private final byte[] tabletId; + + private final List replicas; + + @InterfaceAudience.LimitedPrivate("Tests") + public LocatedTablet(RemoteTablet tablet) { + partition = tablet.getPartition(); + tabletId = tablet.getTabletIdAsBytes(); + replicas = tablet.getReplicas(); + } + + public List getReplicas() { + return replicas; + } + + @InterfaceAudience.LimitedPrivate("Impala") + public Partition getPartition() { + return partition; + } + + /** + * @deprecated use {@link #getPartition()} + */ + @Deprecated + public byte[] getStartKey() { + return getPartition().getPartitionKeyStart(); + } + + /** + * @deprecated use {@link #getPartition()} + */ + @Deprecated() + public byte[] getEndKey() { + return getPartition().getPartitionKeyEnd(); + } + + public byte[] getTabletId() { + return tabletId; + } + + /** + * Return the current leader, or null if there is none. + */ + public Replica getLeaderReplica() { + return getOneOfRoleOrNull(Role.LEADER); + } + + /** + * Return the first occurrence for the given role, or null if there is none. + */ + private Replica getOneOfRoleOrNull(Role role) { + for (Replica r : replicas) { + if (r.getRoleAsEnum().equals(role)) { + return r; + } + } + return null; + } + + @Override + public String toString() { + return Bytes.pretty(tabletId) + " " + partition.toString(); + } + + /** + * One of the replicas of the tablet. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static class Replica { + private final String host; + private final Integer port; + private final Role role; + private final String dimensionLabel; + + Replica(String host, Integer port, Role role, String dimensionLabel) { + this.host = host; + this.port = port; + this.role = role; + this.dimensionLabel = dimensionLabel; + } + + public String getRpcHost() { + return host; + } + + public Integer getRpcPort() { + return port; + } + + Role getRoleAsEnum() { + return role; + } + + public String getRole() { + return role.toString(); + } + + public String getDimensionLabel() { + return dimensionLabel; + } + + @Override + public String toString() { + final StringBuilder buf = new StringBuilder(); + buf.append("Replica(host=").append(host == null ? "null" : host); + buf.append(", port=").append(port == null ? "null" : port.toString()); + buf.append(", role=").append(getRole()); + buf.append(", dimensionLabel=").append(dimensionLabel == null ? "null" : dimensionLabel); + buf.append(')'); + return buf.toString(); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Negotiator.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Negotiator.java new file mode 100644 index 0000000000..1099a15288 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Negotiator.java @@ -0,0 +1,1058 @@ +/* + * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * - Redistributions of source code must retain the aabove copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.kudu.client; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.security.PrivilegedActionException; +import java.security.PrivilegedExceptionAction; +import java.security.cert.Certificate; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import javax.net.ssl.SSLEngine; +import javax.net.ssl.SSLException; +import javax.net.ssl.SSLPeerUnverifiedException; +import javax.security.auth.Subject; +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.auth.kerberos.KerberosTicket; +import javax.security.sasl.Sasl; +import javax.security.sasl.SaslClient; +import javax.security.sasl.SaslException; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.google.protobuf.ByteString; +import com.google.protobuf.UnsafeByteOperations; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.Channel; +import io.netty.channel.ChannelHandlerAdapter; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.SimpleChannelInboundHandler; +import io.netty.channel.embedded.EmbeddedChannel; +import io.netty.handler.ssl.SslHandler; +import io.netty.util.concurrent.Future; +import org.apache.yetus.audience.InterfaceAudience; +import org.ietf.jgss.GSSException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.rpc.RpcHeader; +import org.apache.kudu.rpc.RpcHeader.AuthenticationTypePB; +import org.apache.kudu.rpc.RpcHeader.NegotiatePB; +import org.apache.kudu.rpc.RpcHeader.NegotiatePB.NegotiateStep; +import org.apache.kudu.rpc.RpcHeader.RpcFeatureFlag; +import org.apache.kudu.security.Token.JwtRawPB; +import org.apache.kudu.security.Token.SignedTokenPB; +import org.apache.kudu.util.SecurityUtil; + +/** + * Netty Pipeline handler which runs connection negotiation with + * the server. When negotiation is complete, this removes itself + * from the pipeline and fires a Negotiator.Success or Negotiator.Failure upstream. + */ +@InterfaceAudience.Private +public class Negotiator extends SimpleChannelInboundHandler { + private static final Logger LOG = LoggerFactory.getLogger(Negotiator.class); + + private final SaslClientCallbackHandler saslCallback = new SaslClientCallbackHandler(); + private static final ImmutableSet SUPPORTED_RPC_FEATURES = + ImmutableSet.of( + RpcHeader.RpcFeatureFlag.APPLICATION_FEATURE_FLAGS, + RpcHeader.RpcFeatureFlag.TLS); + + /** + * Set of SASL mechanisms supported by the client, in descending priority order. + * The client will pick the first of these mechanisms that is supported by + * the server and also succeeds to initialize. + */ + private enum SaslMechanism { + GSSAPI, + PLAIN, + } + + static final int CONNECTION_CTX_CALL_ID = -3; + static final int SASL_CALL_ID = -33; + + /** + * The cipher suites, in order of our preference. + * + * This list is based on the kDefaultTls13Ciphers and kDefaultTlsCiphers lists + * in security_flags.cc: see that file for details on how it was derived. + * + * For the mapping between IANA and OpenSSL cipher names, run + * `openssl ciphers -stdname` on OpenSSL 1.1.1 (and newer) or see + * https://www.openssl.org/docs/man1.1.1/man1/ciphers.html + * https://wiki.mozilla.org/Security/Cipher_Suites + * + * For information on TLSv1.3 (JEP 332) and appropriate ciphers in Java 8 + * updates, see + * https://www.oracle.com/java/technologies/javase/8all-relnotes.html + */ + static final String[] PREFERRED_CIPHER_SUITES = new String[] { + "TLS_AES_128_GCM_SHA256", // TLSv1.3: Java 8 updates (8u261), Java 11 + "TLS_AES_256_GCM_SHA384", // TLSv1.3: Java 8 updates (8u261), Java 11 + "TLS_CHACHA20_POLY1305_SHA256", // TLSv1.3: Java 12 + "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",// TLSv1.2: Java 8 + "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", // TLSv1.2: Java 8 + "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",// TLSv1.2: Java 8 + "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", // TLSv1.2: Java 8 + "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256", // TLSv1.2: Java 12 + "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256", // TLSv1.2: Java 12 + // The following AES CBC ciphers are present to be around when no AES GCM + // ciphers are available (that's so for some FIPS 140-2 environments). + "TLS_ECDHE_ECDSA_WITH_AES_128_CCM", // TLSv1.2: custom JSSE providers + "TLS_ECDHE_ECDSA_WITH_AES_256_CCM", // TLSv1.2: custom JSSE providers + "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",// TLSv1.2: Java 7 + "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256", // TLSv1.2: Java 7 + "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384",// TLSv1.2: Java 7 + "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384", // TLSv1.2: Java 7 + }; + + /** + * TLS protocols to enable among those supported by SSLEngine. + * This list is based on the kDefaultTlsMinVersion in security_flags.cc. + */ + static final String[] PREFERRED_PROTOCOLS = new String[]{ + "TLSv1.3", // Java 8 updates (8u261), Java 11 + "TLSv1.2", // Java 8 + }; + + private enum State { + INITIAL, + AWAIT_NEGOTIATE, + AWAIT_TLS_HANDSHAKE, + AWAIT_AUTHN_TOKEN_EXCHANGE, + AWAIT_JWT_EXCHANGE, + AWAIT_SASL, + FINISHED + } + + /** The remote hostname we're connecting to, used by TLS and GSSAPI */ + private final String remoteHostname; + /** The security context holding the client credentials */ + private final SecurityContext securityContext; + /** + * The authentication token we'll try to connect with, maybe null. + * This is fetched from {@link #securityContext} in the constructor to + * ensure that it doesn't change over the course of a negotiation attempt. + * An authentication token is used as secondary credentials. + */ + private final SignedTokenPB authnToken; + /** + * A JSON Web Token (JWT) to authenticate this client/actor to the server + * that we'll try to connect with. Similar to {@link #authnToken}, this token + * may be null, and it's fetched from {@link #securityContext} as well. + * Cannot change over the course of an RPC connection negotiation attempt. + * + * @note unlike {@link #authnToken}, {@link #jsonWebToken} is used as primary credentials + */ + private final JwtRawPB jsonWebToken; + + private enum AuthnTokenNotUsedReason { + NONE_AVAILABLE("no token is available"), + NO_TRUSTED_CERTS("no TLS certificates are trusted by the client"), + FORBIDDEN_BY_POLICY("this connection will be used to acquire a new token and " + + "therefore requires primary credentials"), + NOT_CHOSEN_BY_SERVER("the server chose not to accept token authentication"); + + AuthnTokenNotUsedReason(String msg) { + this.msg = msg; + } + + final String msg; + } + + private AuthnTokenNotUsedReason authnTokenNotUsedReason = null; + + private State state = State.INITIAL; + private SaslClient saslClient; + + /** The negotiated mechanism, set after NEGOTIATE stage. */ + private SaslMechanism chosenMech; + + /** The negotiated authentication type, set after NEGOTIATE state. */ + private AuthenticationTypePB.TypeCase chosenAuthnType; + + /** The features supported by the server, set after NEGOTIATE stage. */ + private Set serverFeatures; + + /** + * The negotiation protocol relies on tunneling the TLS handshake through + * protobufs. The embedder holds a Netty SslHandler which can perform the + * handshake. Once the handshake is complete, we will stop using the embedder + * and add the handler directly to the real ChannelPipeline. + * Only non-null once TLS is initiated. + */ + private EmbeddedChannel sslEmbedder; + + /** + * The nonce sent from the server to the client, or null if negotiation has + * not yet taken place, or the server does not send a nonce. + */ + private byte[] nonce; + + /** + * Future indicating whether the embedded handshake has completed. + * Only non-null once TLS is initiated. + */ + private Future sslHandshakeFuture; + + private Certificate peerCert; + + private final String saslProtocolName; + + private final boolean requireAuthentication; + + private final boolean requireEncryption; + + private final boolean encryptLoopback; + + @InterfaceAudience.LimitedPrivate("Test") + boolean overrideLoopbackForTests; + + public Negotiator(String remoteHostname, + SecurityContext securityContext, + boolean ignoreAuthnToken, + String saslProtocolName, + boolean requireAuthentication, + boolean requireEncryption, + boolean encryptLoopback) { + this.remoteHostname = remoteHostname; + this.securityContext = securityContext; + this.saslProtocolName = saslProtocolName; + this.requireAuthentication = requireAuthentication; + this.requireEncryption = requireEncryption; + this.encryptLoopback = encryptLoopback; + + SignedTokenPB token = securityContext.getAuthenticationToken(); + if (token != null) { + if (ignoreAuthnToken) { + this.authnToken = null; + this.authnTokenNotUsedReason = AuthnTokenNotUsedReason.FORBIDDEN_BY_POLICY; + } else if (!securityContext.hasTrustedCerts()) { + this.authnToken = null; + this.authnTokenNotUsedReason = AuthnTokenNotUsedReason.NO_TRUSTED_CERTS; + } else { + this.authnToken = token; + } + } else { + this.authnToken = null; + this.authnTokenNotUsedReason = AuthnTokenNotUsedReason.NONE_AVAILABLE; + } + + JwtRawPB jwt = securityContext.getJsonWebToken(); + if (jwt != null && securityContext.hasTrustedCerts()) { + this.jsonWebToken = jwt; + } else { + this.jsonWebToken = null; + } + } + + public void sendHello(ChannelHandlerContext ctx) { + sendNegotiateMessage(ctx); + } + + private void sendNegotiateMessage(ChannelHandlerContext ctx) { + RpcHeader.NegotiatePB.Builder builder = RpcHeader.NegotiatePB.newBuilder() + .setStep(RpcHeader.NegotiatePB.NegotiateStep.NEGOTIATE); + + // Advertise our supported features + // ---------------------------------- + for (RpcHeader.RpcFeatureFlag flag : SUPPORTED_RPC_FEATURES) { + builder.addSupportedFeatures(flag); + } + if (isLoopbackConnection(ctx.channel()) && !encryptLoopback) { + builder.addSupportedFeatures(RpcFeatureFlag.TLS_AUTHENTICATION_ONLY); + } + + // Advertise our authentication types. + // ---------------------------------- + // We always advertise SASL. + builder.addAuthnTypesBuilder().setSasl( + AuthenticationTypePB.Sasl.getDefaultInstance()); + + // We may also have a token. But, we can only use the token + // if we are able to use authenticated TLS to authenticate the server. + if (authnToken != null) { + builder.addAuthnTypesBuilder().setToken( + AuthenticationTypePB.Token.getDefaultInstance()); + } + + // We may also have a JSON Web token, but it can be sent to the server + // only if we can verify the server's authenticity and the channel between + // this client and the server is confidential, i.e. it's protected by + // authenticated TLS. + if (jsonWebToken != null) { + builder.addAuthnTypesBuilder().setJwt( + AuthenticationTypePB.Jwt.getDefaultInstance()); + } + + // We currently don't support client-certificate authentication from the + // Java client. + + state = State.AWAIT_NEGOTIATE; + sendSaslMessage(ctx, builder.build()); + } + + @SuppressWarnings("FutureReturnValueIgnored") + private void sendSaslMessage(ChannelHandlerContext ctx, RpcHeader.NegotiatePB msg) { + RpcHeader.RequestHeader.Builder builder = RpcHeader.RequestHeader.newBuilder(); + builder.setCallId(SASL_CALL_ID); + ctx.writeAndFlush(new RpcOutboundMessage(builder, msg), ctx.voidPromise()); + } + + @Override + public void channelRead0(ChannelHandlerContext ctx, CallResponse msg) throws IOException { + final RpcHeader.ResponseHeader header = msg.getHeader(); + if (header.getIsError()) { + final RpcHeader.ErrorStatusPB.Builder errBuilder = RpcHeader.ErrorStatusPB.newBuilder(); + KuduRpc.readProtobuf(msg.getPBMessage(), errBuilder); + final RpcHeader.ErrorStatusPB error = errBuilder.build(); + LOG.debug("peer {} sent connection negotiation error: {}", + ctx.channel().remoteAddress(), error.getMessage()); + + // The upstream code should handle the negotiation failure. + state = State.FINISHED; + ctx.pipeline().remove(this); + ctx.fireChannelRead(new Failure(error)); + return; + } + + RpcHeader.NegotiatePB response = parseSaslMsgResponse(msg); + // TODO: check that the message type matches the expected one in all + // of the below implementations. + switch (state) { + case AWAIT_NEGOTIATE: + handleNegotiateResponse(ctx, response); + break; + case AWAIT_SASL: + handleSaslMessage(ctx, response); + break; + case AWAIT_AUTHN_TOKEN_EXCHANGE: + handleAuthnTokenExchangeResponse(ctx, response); + break; + case AWAIT_JWT_EXCHANGE: + handleJwtExchangeResponse(ctx, response); + break; + case AWAIT_TLS_HANDSHAKE: + handleTlsMessage(ctx, response); + break; + default: + throw new IllegalStateException("received a message in unexpected state: " + + state.toString()); + } + } + + private void handleSaslMessage(ChannelHandlerContext ctx, NegotiatePB response) + throws IOException { + switch (response.getStep()) { + case SASL_CHALLENGE: + handleChallengeResponse(ctx, response); + break; + case SASL_SUCCESS: + handleSuccessResponse(ctx, response); + break; + default: + throw new IllegalStateException("Wrong negotiation step: " + + response.getStep()); + } + } + + + private RpcHeader.NegotiatePB parseSaslMsgResponse(CallResponse response) { + RpcHeader.ResponseHeader responseHeader = response.getHeader(); + int id = responseHeader.getCallId(); + if (id != SASL_CALL_ID) { + throw new IllegalStateException("Received a call that wasn't for SASL"); + } + + RpcHeader.NegotiatePB.Builder saslBuilder = RpcHeader.NegotiatePB.newBuilder(); + KuduRpc.readProtobuf(response.getPBMessage(), saslBuilder); + return saslBuilder.build(); + } + + private void handleNegotiateResponse(ChannelHandlerContext ctx, + RpcHeader.NegotiatePB response) throws IOException { + Preconditions.checkState(response.getStep() == NegotiateStep.NEGOTIATE, + "Expected NEGOTIATE message, got {}", response.getStep()); + + // Store the supported features advertised by the server. + serverFeatures = getFeatureFlags(response); + // If the server supports TLS, we will always speak TLS to it. + final boolean negotiatedTls = serverFeatures.contains(RpcFeatureFlag.TLS); + if (!negotiatedTls && requireEncryption) { + throw new NonRecoverableException(Status.NotAuthorized( + "server does not support required TLS encryption")); + } + + // Check the negotiated authentication type sent by the server. + chosenAuthnType = chooseAuthenticationType(response); + + if (chosenAuthnType == AuthenticationTypePB.TypeCase.SASL) { + chooseAndInitializeSaslMech(response); + } + + // If we negotiated TLS, then we want to start the TLS handshake; otherwise, + // we can move directly to the authentication phase. + if (negotiatedTls) { + startTlsHandshake(ctx); + } else { + startAuthentication(ctx); + } + } + + /** + * Determine whether the given channel is a loopback connection (i.e. the server + * and client are on the same host). + */ + private boolean isLoopbackConnection(Channel channel) { + if (overrideLoopbackForTests) { + return true; + } + try { + InetAddress local = ((InetSocketAddress)channel.localAddress()).getAddress(); + InetAddress remote = ((InetSocketAddress)channel.remoteAddress()).getAddress(); + return local.equals(remote); + } catch (ClassCastException cce) { + // In the off chance that we have some other type of local/remote address, + // we'll just assume it's not loopback. + return false; + } + } + + private void chooseAndInitializeSaslMech(NegotiatePB response) throws KuduException { + securityContext.refreshSubject(); + // Gather the set of server-supported mechanisms. + Map errorsByMech = Maps.newHashMap(); + Set serverMechs = Sets.newHashSet(); + for (RpcHeader.NegotiatePB.SaslMechanism mech : response.getSaslMechanismsList()) { + switch (mech.getMechanism().toUpperCase(Locale.ENGLISH)) { + case "GSSAPI": + serverMechs.add(SaslMechanism.GSSAPI); + break; + case "PLAIN": + serverMechs.add(SaslMechanism.PLAIN); + break; + default: + errorsByMech.put(mech.getMechanism(), "unrecognized mechanism"); + break; + } + } + + // For each of our own mechanisms, in descending priority, check if + // the server also supports them. If so, try to initialize saslClient. + // If we find a common mechanism that also can be successfully initialized, + // choose that mech. + for (SaslMechanism clientMech : SaslMechanism.values()) { + + if (clientMech.equals(SaslMechanism.GSSAPI)) { + Subject s = securityContext.getSubject(); + if (s == null || + s.getPrivateCredentials(KerberosTicket.class).isEmpty()) { + errorsByMech.put(clientMech.name(), "client does not have Kerberos credentials (tgt)"); + continue; + } + if (SecurityUtil.isTgtExpired(s)) { + errorsByMech.put(clientMech.name(), "client Kerberos credentials (TGT) have expired"); + continue; + } + } + + if (!serverMechs.contains(clientMech)) { + errorsByMech.put(clientMech.name(), "not advertised by server"); + continue; + } + Map props = Maps.newHashMap(); + // If the negotiated mechanism is GSSAPI (Kerberos), configure SASL to use + // integrity protection so that the channel bindings and nonce can be + // verified. + if (clientMech == SaslMechanism.GSSAPI) { + props.put(Sasl.QOP, "auth-int"); + } + + try { + saslClient = Sasl.createSaslClient(new String[]{ clientMech.name() }, + null, + saslProtocolName, + remoteHostname, + props, + saslCallback); + chosenMech = clientMech; + break; + } catch (SaslException e) { + errorsByMech.put(clientMech.name(), e.getMessage()); + } + } + + if (chosenMech != null) { + LOG.debug("SASL mechanism {} chosen for peer {}", chosenMech.name(), remoteHostname); + if (chosenMech.equals(SaslMechanism.PLAIN) && requireAuthentication) { + String message = "client requires authentication, " + + "but server does not have Kerberos enabled"; + throw new NonRecoverableException(Status.NotAuthorized(message)); + } + return; + } + + // TODO(KUDU-1948): when the Java client has an option to require security, detect the case + // where the server is configured without Kerberos and the client requires it. + String message; + if (serverMechs.size() == 1 && serverMechs.contains(SaslMechanism.GSSAPI)) { + // Give a better error diagnostic for common case of an unauthenticated client connecting + // to a secure server. + message = "server requires authentication, but " + + errorsByMech.get(SaslMechanism.GSSAPI.name()); + } else { + message = "client/server supported SASL mechanism mismatch: [" + + Joiner.on(", ").withKeyValueSeparator(": ").join(errorsByMech) + "]"; + } + + if (authnTokenNotUsedReason != null) { + message += ". Authentication tokens were not used because " + + authnTokenNotUsedReason.msg; + } + + // If client has valid secondary authn credentials (such as authn token), + // but it does not have primary authn credentials (such as Kerberos creds), + // throw a recoverable exception. So that the request can be retried as long + // as the original call hasn't timed out, for cases documented in KUDU-2267, + // e.g. masters are in the process of the very first leader election after + // started up and does not have CA signed cert. + if (authnToken != null) { + throw new RecoverableException(Status.NotAuthorized(message)); + } else { + throw new NonRecoverableException(Status.NotAuthorized(message)); + } + } + + private AuthenticationTypePB.TypeCase chooseAuthenticationType(NegotiatePB response) { + Preconditions.checkArgument(response.getAuthnTypesCount() <= 1, + "Expected server to reply with at most one authn type"); + + if (response.getAuthnTypesCount() == 0) { + // Default to SASL for compatibility with old servers. + return AuthenticationTypePB.TypeCase.SASL; + } + + AuthenticationTypePB.TypeCase type = response.getAuthnTypes(0).getTypeCase(); + switch (type) { + case SASL: + if (authnToken != null) { + authnTokenNotUsedReason = AuthnTokenNotUsedReason.NOT_CHOSEN_BY_SERVER; + } + break; + case TOKEN: + if (authnToken == null) { + // TODO(todd): should we also check whether we have a CA cert? + // it seems like this should have the same logic as whether we advertised it + throw new IllegalArgumentException("server chose token authentication " + + "but client had no valid token"); + } + break; + case JWT: + if (jsonWebToken == null) { + throw new IllegalArgumentException("server chose JWT authentication " + + "but client had no valid JWT"); + } + break; + default: + throw new IllegalArgumentException("server chose bad authn type " + chosenAuthnType); + } + return type; + } + + private Set getFeatureFlags(NegotiatePB response) { + ImmutableSet.Builder features = ImmutableSet.builder(); + for (RpcHeader.RpcFeatureFlag feature : response.getSupportedFeaturesList()) { + if (feature != RpcFeatureFlag.UNKNOWN) { + features.add(feature); + } + } + return features.build(); + } + + /** + * Send the initial TLS "ClientHello" message. + */ + private void startTlsHandshake(ChannelHandlerContext ctx) throws SSLException { + SSLEngine engine; + switch (chosenAuthnType) { + case SASL: + engine = securityContext.createSSLEngineTrustAll(); + break; + case TOKEN: + case JWT: + engine = securityContext.createSSLEngine(); + break; + default: + throw new AssertionError("unreachable"); + } + engine.setUseClientMode(true); + + // Set the preferred cipher suites. + { + Set supported = Sets.newHashSet(engine.getSupportedCipherSuites()); + List toEnable = Lists.newArrayList(); + for (String c: PREFERRED_CIPHER_SUITES) { + if (supported.contains(c)) { + toEnable.add(c); + } + } + if (toEnable.isEmpty()) { + // This should never be the case given the cipher suites we picked are + // supported by the standard JDK, but just in case, better to have a clear + // exception. + throw new RuntimeException("found no preferred cipher suite among supported: " + + Joiner.on(',').join(supported)); + } + engine.setEnabledCipherSuites(toEnable.toArray(new String[0])); + } + + // Enable preferred TLS protocols, if supported. This is to match the set + // of TLS protocols supported by Kudu servers: no other protocols need to + // be enabled. In addition, this is to enable TLSv1.3 in Java 8. The latest + // builds of OpenJDK 8 and Oracle JDK 8 support TLSv1.3, but TLSv1.3 is not + // enabled by default for SSLEngine. + // For example, see Oracle JDK 8u261 update release notes at + // https://www.oracle.com/java/technologies/javase/8u261-relnotes.html + // TLSv1.3 is enabled by default in Java 11, at least with OpenJDK. + { + Set supported = Sets.newHashSet(engine.getSupportedProtocols()); + List toEnable = Lists.newArrayList(); + for (String p : PREFERRED_PROTOCOLS) { + if (supported.contains(p)) { + toEnable.add(p); + } + } + if (toEnable.isEmpty()) { + // This should never be the case given that at least one preferred TLS + // protocol (TLSv1) is supported by the standard JDK. It's better to + // have a clear exception, just in case. + throw new RuntimeException("found no preferred TLS protocol among supported: " + + Joiner.on(',').join(supported)); + } + engine.setEnabledProtocols(toEnable.toArray(new String[0])); + } + + // TODO(aserbin): maybe, check that at least one cipher is enabled per each + // enabled protocol? + + SharableSslHandler handler = new SharableSslHandler(engine); + + sslEmbedder = new EmbeddedChannel(handler); + sslHandshakeFuture = handler.handshakeFuture(); + state = State.AWAIT_TLS_HANDSHAKE; + boolean sent = sendPendingOutboundTls(ctx); + assert sent; + } + + /** + * Handle an inbound message during the TLS handshake. If this message + * causes the handshake to complete, triggers the beginning of SASL initiation. + */ + private void handleTlsMessage(ChannelHandlerContext ctx, NegotiatePB response) + throws IOException { + Preconditions.checkState(response.getStep() == NegotiateStep.TLS_HANDSHAKE); + Preconditions.checkArgument(!response.getTlsHandshake().isEmpty(), + "empty TLS message from server"); + + // Pass the TLS message into our embedded SslHandler. + sslEmbedder.writeInbound(Unpooled.copiedBuffer( + response.getTlsHandshake().asReadOnlyByteBuffer())); + sslEmbedder.flush(); + if (sendPendingOutboundTls(ctx)) { + // Data was sent -- we must continue the handshake process. + return; + } + + // The handshake completed. + // Insert the SSL handler into the pipeline so that all following traffic + // gets encrypted, and then move on to the SASL portion of negotiation. + // + // NOTE: this takes effect immediately (i.e. the following SASL initiation + // sequence is encrypted). + SharableSslHandler handler = (SharableSslHandler) sslEmbedder.pipeline().first(); + handler.resetAdded(); + Certificate[] certs = handler.engine().getSession().getPeerCertificates(); + if (certs.length == 0) { + throw new SSLPeerUnverifiedException("no peer cert found"); + } + + // The first element of the array is the peer's own certificate. + peerCert = certs[0]; + + // Don't wrap the TLS socket if we are using TLS for authentication only. + boolean isAuthOnly = serverFeatures.contains(RpcFeatureFlag.TLS_AUTHENTICATION_ONLY) && + isLoopbackConnection(ctx.channel()) && !encryptLoopback; + if (!isAuthOnly) { + ctx.pipeline().addFirst("tls", handler); + } + startAuthentication(ctx); + } + + /** + * If the embedded SslHandler has data to send outbound, gather + * it all, send it tunneled in a NegotiatePB message, and return true. + * + * Otherwise, indicates that the handshake is complete by returning false. + */ + private boolean sendPendingOutboundTls(ChannelHandlerContext ctx) { + // The SslHandler can generate multiple TLS messages in response + // (e.g. ClientKeyExchange, ChangeCipherSpec, ClientFinished). + // We poll the handler until it stops giving us buffers. + List bufs = Lists.newArrayList(); + while (!sslEmbedder.outboundMessages().isEmpty()) { + ByteBuf msg = sslEmbedder.readOutbound(); + bufs.add(ByteString.copyFrom(msg.nioBuffer())); + // Release the reference counted ByteBuf to avoid leaks now that we are done with it. + // https://netty.io/wiki/reference-counted-objects.html + msg.release(); + } + ByteString data = ByteString.copyFrom(bufs); + if (sslHandshakeFuture.isDone()) { + // TODO(todd): should check sslHandshakeFuture.isSuccess() + if (!data.isEmpty()) { + // This is a case of TLSv1.3 protocol. + sendTunneledTls(ctx, data); + } + return false; + } else { + assert data.size() > 0; + sendTunneledTls(ctx, data); + return true; + } + } + + /** + * Send a buffer of data for the TLS handshake, encapsulated in the + * appropriate TLS_HANDSHAKE negotiation message. + */ + private void sendTunneledTls(ChannelHandlerContext ctx, ByteString buf) { + sendSaslMessage(ctx, NegotiatePB.newBuilder() + .setStep(NegotiateStep.TLS_HANDSHAKE) + .setTlsHandshake(buf) + .build()); + } + + private void startAuthentication(ChannelHandlerContext ctx) + throws SaslException, NonRecoverableException { + switch (chosenAuthnType) { + case SASL: + sendSaslInitiate(ctx); + break; + case TOKEN: + sendTokenExchange(ctx); + break; + case JWT: + sendJwtExchange(ctx); + break; + default: + throw new AssertionError("unreachable"); + } + } + + private void sendTokenExchange(ChannelHandlerContext ctx) { + // We must not send authn token unless we have successfully finished + // authenticating via TLS. + Preconditions.checkNotNull(authnToken); + Preconditions.checkNotNull(sslHandshakeFuture); + Preconditions.checkState(sslHandshakeFuture.isSuccess()); + + RpcHeader.NegotiatePB.Builder builder = RpcHeader.NegotiatePB.newBuilder() + .setStep(NegotiateStep.TOKEN_EXCHANGE) + .setAuthnToken(authnToken); + state = State.AWAIT_AUTHN_TOKEN_EXCHANGE; + sendSaslMessage(ctx, builder.build()); + } + + private void sendJwtExchange(ChannelHandlerContext ctx) { + // We must not send JWT unless we have successfully finished + // authenticating via TLS. + Preconditions.checkNotNull(jsonWebToken); + Preconditions.checkNotNull(sslHandshakeFuture); + Preconditions.checkState(sslHandshakeFuture.isSuccess()); + + RpcHeader.NegotiatePB.Builder builder = RpcHeader.NegotiatePB.newBuilder() + .setStep(NegotiateStep.JWT_EXCHANGE) + .setJwtRaw(jsonWebToken); + state = State.AWAIT_JWT_EXCHANGE; + sendSaslMessage(ctx, builder.build()); + } + + private void handleAuthnTokenExchangeResponse(ChannelHandlerContext ctx, NegotiatePB response) + throws SaslException { + Preconditions.checkArgument(response.getStep() == NegotiateStep.TOKEN_EXCHANGE, + "expected TOKEN_EXCHANGE, got step: {}", response.getStep()); + + // The authn token response doesn't have any actual data in it, so we can just move on. + finish(ctx); + } + + private void handleJwtExchangeResponse(ChannelHandlerContext ctx, NegotiatePB response) + throws SaslException { + Preconditions.checkArgument(response.getStep() == NegotiateStep.JWT_EXCHANGE, + "expected JWT_EXCHANGE, got step: {}", response.getStep()); + + // The JWT response doesn't have any actual data in it, so we can just move on. + finish(ctx); + } + + private void sendSaslInitiate(ChannelHandlerContext ctx) + throws SaslException, NonRecoverableException { + RpcHeader.NegotiatePB.Builder builder = RpcHeader.NegotiatePB.newBuilder(); + if (saslClient.hasInitialResponse()) { + byte[] initialResponse = evaluateChallenge(new byte[0]); + builder.setToken(UnsafeByteOperations.unsafeWrap(initialResponse)); + } + builder.setStep(RpcHeader.NegotiatePB.NegotiateStep.SASL_INITIATE); + builder.addSaslMechanismsBuilder().setMechanism(chosenMech.name()); + state = State.AWAIT_SASL; + sendSaslMessage(ctx, builder.build()); + } + + private void handleChallengeResponse(ChannelHandlerContext ctx, RpcHeader.NegotiatePB response) + throws SaslException, NonRecoverableException { + byte[] saslToken = evaluateChallenge(response.getToken().toByteArray()); + if (saslToken == null) { + throw new IllegalStateException("Not expecting an empty token"); + } + RpcHeader.NegotiatePB.Builder builder = RpcHeader.NegotiatePB.newBuilder(); + builder.setToken(UnsafeByteOperations.unsafeWrap(saslToken)); + builder.setStep(RpcHeader.NegotiatePB.NegotiateStep.SASL_RESPONSE); + sendSaslMessage(ctx, builder.build()); + } + + /** + * Verify the channel bindings included in 'response'. This is used only + * for GSSAPI-authenticated connections over TLS. + * @throws SSLPeerUnverifiedException on failure to verify + */ + private void verifyChannelBindings(NegotiatePB response) throws IOException { + byte[] expected = SecurityUtil.getEndpointChannelBindings(peerCert); + if (!response.hasChannelBindings()) { + throw new SSLPeerUnverifiedException("no channel bindings provided by remote peer"); + } + byte[] provided = response.getChannelBindings().toByteArray(); + // NOTE: the C SASL library's implementation of sasl_encode() actually + // includes a length prefix. Java's equivalents do not. So, we have to + // chop off the length prefix here before unwrapping. + if (provided.length < 4) { + throw new SSLPeerUnverifiedException("invalid too-short channel bindings"); + } + byte[] unwrapped = saslClient.unwrap(provided, 4, provided.length - 4); + if (!Bytes.equals(expected, unwrapped)) { + throw new SSLPeerUnverifiedException("invalid channel bindings provided by remote peer"); + } + } + + private void handleSuccessResponse(ChannelHandlerContext ctx, NegotiatePB response) + throws IOException { + Preconditions.checkState(saslClient.isComplete(), + "server sent SASL_SUCCESS step, but SASL negotiation is not complete"); + if (chosenMech == SaslMechanism.GSSAPI) { + if (response.hasNonce()) { + // Grab the nonce from the server, if it has sent one. We'll send it back + // later with SASL integrity protection as part of the connection context. + nonce = response.getNonce().toByteArray(); + } + + if (peerCert != null) { + // Check the channel bindings provided by the server against the expected channel bindings. + verifyChannelBindings(response); + } + } + + finish(ctx); + } + + /** + * Marks the negotiation as finished, and sends the connection context to the server. + * @param ctx the connection context + */ + @SuppressWarnings("FutureReturnValueIgnored") + private void finish(ChannelHandlerContext ctx) throws SaslException { + state = State.FINISHED; + ctx.pipeline().remove(this); + + ctx.writeAndFlush(makeConnectionContext(), ctx.voidPromise()); + LOG.debug("Authenticated connection {} using {}/{}", + ctx.channel(), chosenAuthnType, chosenMech); + ctx.fireChannelRead(new Success(serverFeatures)); + } + + private RpcOutboundMessage makeConnectionContext() throws SaslException { + RpcHeader.ConnectionContextPB.Builder builder = RpcHeader.ConnectionContextPB.newBuilder(); + + // The UserInformationPB is deprecated, but used by servers prior to Kudu 1.1. + RpcHeader.UserInformationPB.Builder userBuilder = RpcHeader.UserInformationPB.newBuilder(); + String user = securityContext.getRealUser(); + userBuilder.setEffectiveUser(user); + userBuilder.setRealUser(user); + builder.setDEPRECATEDUserInfo(userBuilder.build()); + + if (nonce != null) { + // Reply with the SASL-protected nonce. We only set the nonce when using SASL GSSAPI. + // The Java SASL client does not automatically add the length header, + // so we have to do it ourselves. + byte[] encodedNonce = saslClient.wrap(nonce, 0, nonce.length); + ByteBuffer buf = ByteBuffer.allocate(encodedNonce.length + 4); + buf.order(ByteOrder.BIG_ENDIAN); + buf.putInt(encodedNonce.length); + buf.put(encodedNonce); + builder.setEncodedNonce(UnsafeByteOperations.unsafeWrap(buf.array())); + } + + RpcHeader.ConnectionContextPB pb = builder.build(); + RpcHeader.RequestHeader.Builder header = + RpcHeader.RequestHeader.newBuilder().setCallId(CONNECTION_CTX_CALL_ID); + return new RpcOutboundMessage(header, pb); + } + + private byte[] evaluateChallenge(final byte[] challenge) + throws SaslException, NonRecoverableException { + try { + return Subject.doAs(securityContext.getSubject(), + new PrivilegedExceptionAction() { + @Override + public byte[] run() throws SaslException { + return saslClient.evaluateChallenge(challenge); + } + }); + } catch (PrivilegedActionException e) { + // This cast is safe because the action above only throws checked SaslException. + SaslException saslException = (SaslException) e.getCause(); + + // TODO(KUDU-2121): We should never get to this point if the client does not have + // Kerberos credentials, but it seems that on certain platforms it can happen. + // So, we try and determine whether the evaluateChallenge failed due to missing + // credentials, and return a nicer error message if so. + Throwable cause = saslException.getCause(); + if (cause instanceof GSSException && + ((GSSException) cause).getMajor() == GSSException.NO_CRED) { + throw new NonRecoverableException( + Status.ConfigurationError( + "Server requires Kerberos, but this client is not authenticated " + + "(missing or expired TGT)"), + saslException); + } + throw saslException; + } + } + + private class SaslClientCallbackHandler implements CallbackHandler { + @Override + public void handle(Callback[] callbacks) throws UnsupportedCallbackException { + for (Callback callback : callbacks) { + if (callback instanceof NameCallback) { + ((NameCallback) callback).setName(securityContext.getRealUser()); + } else if (callback instanceof PasswordCallback) { + ((PasswordCallback) callback).setPassword(new char[0]); + } else { + throw new UnsupportedCallbackException(callback, + "Unrecognized SASL client callback"); + } + } + } + } + + /** + * The results of a successful negotiation. This is sent to upstream handlers in the + * Netty pipeline after negotiation completes. + */ + static class Success { + final Set serverFeatures; + + public Success(Set serverFeatures) { + this.serverFeatures = serverFeatures; + } + } + + /** + * The results of a failed negotiation. This is sent to upstream handlers in the Netty pipeline + * when a negotiation fails. + */ + static class Failure { + /** The RPC error received from the server. */ + final RpcHeader.ErrorStatusPB status; + + public Failure(RpcHeader.ErrorStatusPB status) { + this.status = status; + } + } + + /** + * A hack to allow sharing the SslHandler even though it's not annotated as "Sharable". + * We aren't technically sharing it, but when we move it from the EmbeddedChannel to + * the actual channel above the sharing validation runs and throws an exception. + * + * https://netty.io/wiki/new-and-noteworthy-in-4.0.html#well-defined-thread-model + * https://netty.io/4.0/api/io/netty/channel/ChannelHandler.Sharable.html + * + * TODO (ghenke): Remove the need for this reflection. + */ + static class SharableSslHandler extends SslHandler { + + public SharableSslHandler(SSLEngine engine) { + super(engine); + } + + void resetAdded() { + Field addedField = AccessController.doPrivileged((PrivilegedAction) () -> { + try { + Class c = ChannelHandlerAdapter.class; + Field added = c.getDeclaredField("added"); + added.setAccessible(true); + return added; + } catch (NoSuchFieldException e) { + throw new RuntimeException(e); + } + }); + try { + addedField.setBoolean(this, false); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/NoLeaderFoundException.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/NoLeaderFoundException.java new file mode 100644 index 0000000000..eadbbbf613 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/NoLeaderFoundException.java @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Indicates that the request failed because we couldn't find a leader. It is retried as long + * as the original call hasn't timed out. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +final class NoLeaderFoundException extends RecoverableException { + + NoLeaderFoundException(Status status) { + super(status); + } + + NoLeaderFoundException(Status status, Exception cause) { + super(status, cause); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/NonCoveredRangeException.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/NonCoveredRangeException.java new file mode 100644 index 0000000000..8d7640bdf4 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/NonCoveredRangeException.java @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Exception indicating that an operation attempted to access a non-covered range partition. + */ +@InterfaceAudience.Private +public class NonCoveredRangeException extends NonRecoverableException { + private final byte[] nonCoveredRangeStart; + private final byte[] nonCoveredRangeEnd; + + public NonCoveredRangeException(byte[] nonCoveredRangeStart, byte[] nonCoveredRangeEnd) { + super(Status.NotFound(getMessage(nonCoveredRangeStart, nonCoveredRangeEnd))); + this.nonCoveredRangeStart = nonCoveredRangeStart; + this.nonCoveredRangeEnd = nonCoveredRangeEnd; + } + + private static String getMessage(byte[] rangeStart, byte[] rangeEnd) { + return String.format("accessed range partition ([%s, %s)) does not exist in table", + rangeStart.length == 0 ? "" : Bytes.hex(rangeStart), + rangeEnd.length == 0 ? "" : Bytes.hex(rangeEnd)); + } + + @Override + public String getMessage() { + return getMessage(nonCoveredRangeStart, nonCoveredRangeEnd); + } + + byte[] getNonCoveredRangeStart() { + return nonCoveredRangeStart; + } + + byte[] getNonCoveredRangeEnd() { + return nonCoveredRangeEnd; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/NonRecoverableException.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/NonRecoverableException.java new file mode 100644 index 0000000000..68c17f8990 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/NonRecoverableException.java @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +@InterfaceAudience.Private +@InterfaceStability.Evolving +@SuppressWarnings("serial") +class NonRecoverableException extends KuduException { + + /** + * Constructor. + * @param status status object containing the reason for the exception + * trace. + */ + NonRecoverableException(Status status) { + super(status); + } + + /** + * Constructor. + * @param status status object containing the reason for the exception + * @param cause The exception that caused this one to be thrown. + */ + NonRecoverableException(Status status, Throwable cause) { + super(status, cause); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Operation.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Operation.java new file mode 100644 index 0000000000..81d7b0efe4 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Operation.java @@ -0,0 +1,638 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.EnumSet; +import java.util.List; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.protobuf.ByteString; +import com.google.protobuf.Message; +import com.google.protobuf.UnsafeByteOperations; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.RowOperations.RowOperationsPB; +import org.apache.kudu.Schema; +import org.apache.kudu.WireProtocol.AppStatusPB.ErrorCode; +import org.apache.kudu.client.ProtobufHelper.SchemaPBConversionFlags; +import org.apache.kudu.client.Statistics.Statistic; +import org.apache.kudu.client.Statistics.TabletStatistics; +import org.apache.kudu.security.Token; +import org.apache.kudu.tserver.Tserver; +import org.apache.kudu.util.Pair; + +/** + * Base class for the RPCs that related to WriteRequestPB. It contains almost all the logic + * and knows how to serialize its child classes. + * + * TODO(todd): this should not extend KuduRpc. Rather, we should make single-operation writes + * just use a Batch instance with a single operation in it. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public abstract class Operation extends KuduRpc { + /** + * This size will be set when serialize is called. It stands for the size of the row in this + * operation. + */ + private long rowOperationSizeBytes = 0; + + enum ChangeType { + INSERT((byte)RowOperationsPB.Type.INSERT.getNumber()), + UPDATE((byte)RowOperationsPB.Type.UPDATE.getNumber()), + DELETE((byte)RowOperationsPB.Type.DELETE.getNumber()), + SPLIT_ROWS((byte)RowOperationsPB.Type.SPLIT_ROW.getNumber()), + UPSERT((byte)RowOperationsPB.Type.UPSERT.getNumber()), + RANGE_LOWER_BOUND((byte) RowOperationsPB.Type.RANGE_LOWER_BOUND.getNumber()), + RANGE_UPPER_BOUND((byte) RowOperationsPB.Type.RANGE_UPPER_BOUND.getNumber()), + EXCLUSIVE_RANGE_LOWER_BOUND( + (byte) RowOperationsPB.Type.EXCLUSIVE_RANGE_LOWER_BOUND.getNumber()), + INCLUSIVE_RANGE_UPPER_BOUND( + (byte) RowOperationsPB.Type.INCLUSIVE_RANGE_UPPER_BOUND.getNumber()), + INSERT_IGNORE((byte) RowOperationsPB.Type.INSERT_IGNORE.getNumber()), + UPDATE_IGNORE((byte) RowOperationsPB.Type.UPDATE_IGNORE.getNumber()), + DELETE_IGNORE((byte) RowOperationsPB.Type.DELETE_IGNORE.getNumber()), + UPSERT_IGNORE((byte) RowOperationsPB.Type.UPSERT_IGNORE.getNumber()); + + ChangeType(byte encodedByte) { + this.encodedByte = encodedByte; + } + + byte toEncodedByte() { + return encodedByte; + } + + /** The byte used to encode this in a RowOperationsPB */ + private final byte encodedByte; + } + + static final String METHOD = "Write"; + + private PartialRow row; + + private Token.SignedTokenPB authzToken; + + /** See {@link SessionConfiguration#setIgnoreAllDuplicateRows(boolean)} */ + boolean ignoreAllDuplicateRows = false; + /** See {@link SessionConfiguration#setIgnoreAllNotFoundRows(boolean)} */ + boolean ignoreAllNotFoundRows = false; + /** + * Transaction identifier for the generated WriteRequestPB. Applicable only + * if set to a valid value. + */ + long txnId = AsyncKuduClient.INVALID_TXN_ID; + + /** + * Package-private constructor. Subclasses need to be instantiated via AsyncKuduSession + * @param table table with the schema to use for this operation + */ + Operation(KuduTable table) { + super(table, null, 0); + this.row = table.getSchema().newPartialRow(); + } + + /** + * Reset the timeout of this batch. + * + * TODO(wdberkeley): The fact we have to do this is a sign an Operation should not subclass + * KuduRpc. + * + * @param timeoutMillis the new timeout of the batch in milliseconds + */ + void resetTimeoutMillis(Timer timer, long timeoutMillis) { + timeoutTracker.reset(); + timeoutTracker.setTimeout(timeoutMillis); + if (timeoutTask != null) { + timeoutTask.cancel(); + } + timeoutTask = AsyncKuduClient.newTimeout(timer, new RpcTimeoutTask(), timeoutMillis); + } + + /** See {@link SessionConfiguration#setIgnoreAllDuplicateRows(boolean)} */ + void setIgnoreAllDuplicateRows(boolean ignoreAllDuplicateRows) { + this.ignoreAllDuplicateRows = ignoreAllDuplicateRows; + } + + /** See {@link SessionConfiguration#setIgnoreAllNotFoundRows(boolean)} */ + void setIgnoreAllNotFoundRows(boolean ignoreAllNotFoundRows) { + this.ignoreAllNotFoundRows = ignoreAllNotFoundRows; + } + + /** + * Set transaction identifier for this operation. If set, the transaction + * identifier is propagated into the generated WriteRequestPB. + * + * @param txnId transaction identifier to set + */ + void setTxnId(long txnId) { + this.txnId = txnId; + } + + /** + * Classes extending Operation need to have a specific ChangeType + * @return Operation's ChangeType + */ + abstract ChangeType getChangeType(); + + /** + * Returns the size in bytes of this operation's row after serialization. + * @return size in bytes + * @throws IllegalStateException thrown if this RPC hasn't been serialized eg sent to a TS + */ + long getRowOperationSizeBytes() { + if (this.rowOperationSizeBytes == 0) { + throw new IllegalStateException("This row hasn't been serialized yet"); + } + return this.rowOperationSizeBytes; + } + + @Override + String serviceName() { + return TABLET_SERVER_SERVICE_NAME; + } + + @Override + String method() { + return METHOD; + } + + @Override + boolean needsAuthzToken() { + return true; + } + + @Override + void bindAuthzToken(Token.SignedTokenPB token) { + authzToken = token; + } + + @Override + Message createRequestPB() { + final Tserver.WriteRequestPB.Builder builder = + createAndFillWriteRequestPB(ImmutableList.of(this)); + this.rowOperationSizeBytes = (long)builder.getRowOperations().getRows().size() + + (long)builder.getRowOperations().getIndirectData().size(); + builder.setTabletId(UnsafeByteOperations.unsafeWrap(getTablet().getTabletIdAsBytes())); + builder.setExternalConsistencyMode(this.externalConsistencyMode.pbVersion()); + if (this.propagatedTimestamp != AsyncKuduClient.NO_TIMESTAMP) { + builder.setPropagatedTimestamp(this.propagatedTimestamp); + } + if (authzToken != null) { + builder.setAuthzToken(authzToken); + } + if (this.txnId != AsyncKuduClient.INVALID_TXN_ID) { + builder.setTxnId(this.txnId); + } + return builder.build(); + } + + @Override + Pair deserialize(CallResponse callResponse, + String tsUUID) throws KuduException { + Tserver.WriteResponsePB.Builder builder = Tserver.WriteResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), builder); + Tserver.WriteResponsePB.PerRowErrorPB error = null; + if (builder.getPerRowErrorsCount() != 0) { + error = builder.getPerRowErrors(0); + ErrorCode errorCode = error.getError().getCode(); + if ((ignoreAllDuplicateRows && errorCode == ErrorCode.ALREADY_PRESENT) || + (ignoreAllNotFoundRows && errorCode == ErrorCode.NOT_FOUND)) { + error = null; + } + } + Tserver.ResourceMetricsPB metricsPB = builder.hasResourceMetrics() ? + builder.getResourceMetrics() : null; + OperationResponse response = new OperationResponse(timeoutTracker.getElapsedMillis(), + tsUUID, + builder.getTimestamp(), + this, + error, + metricsPB); + return new Pair<>(response, builder.hasError() ? builder.getError() : null); + } + + @Override + public byte[] partitionKey() { + return this.getTable().getPartitionSchema().encodePartitionKey(row); + } + + @Override + boolean isRequestTracked() { + return true; + } + + /** + * Get the underlying row to modify. + * @return a partial row that will be sent with this Operation + */ + public PartialRow getRow() { + return this.row; + } + + /** + * Set the underlying row. + * + * Note: The schema of the underlying row and the table must be equal by reference. + * To ensure they are equal, create the partial row from the table's schema. + * + *

{@code
+   *   KuduTable table = client.openTable("my-table");
+   *   PartialRow row = table.getSchema().newPartialRow();
+   *   ...
+   *   Operation op = table.newInsert();
+   *   op.setRow(row);
+   * }
+ * + * @param row the row to set + */ + public void setRow(PartialRow row) { + Preconditions.checkArgument(row.getSchema() == table.getSchema(), + "The row's schema must be equal by reference to the table schema"); + this.row = row; + } + + @Override + void updateStatistics(Statistics statistics, OperationResponse response) { + String tabletId = this.getTablet().getTabletId(); + String tableName = this.getTable().getName(); + TabletStatistics tabletStatistics = statistics.getTabletStatistics(tableName, tabletId); + if (response == null) { + tabletStatistics.incrementStatistic(Statistic.OPS_ERRORS, 1); + tabletStatistics.incrementStatistic(Statistic.RPC_ERRORS, 1); + return; + } + tabletStatistics.incrementStatistic(Statistic.WRITE_RPCS, 1); + if (response.hasRowError()) { + // If ignoreAllDuplicateRows is set, the already_present exception will be + // discarded and wont't be recorded here + tabletStatistics.incrementStatistic(Statistic.OPS_ERRORS, 1); + } else { + tabletStatistics.incrementStatistic(Statistic.WRITE_OPS, 1); + } + tabletStatistics.incrementStatistic(Statistic.BYTES_WRITTEN, getRowOperationSizeBytes()); + } + + /** + * Helper method that puts a list of Operations together into a WriteRequestPB. + * @param operations The list of ops to put together in a WriteRequestPB + * @return A fully constructed WriteRequestPB containing the passed rows, or + * null if no rows were passed. + */ + static Tserver.WriteRequestPB.Builder createAndFillWriteRequestPB(List operations) { + if (operations == null || operations.isEmpty()) { + return null; + } + Schema schema = operations.get(0).table.getSchema(); + RowOperationsPB rowOps = new OperationsEncoder().encodeOperations(operations); + if (rowOps == null) { + return null; + } + + Tserver.WriteRequestPB.Builder requestBuilder = Tserver.WriteRequestPB.newBuilder(); + requestBuilder.setSchema(ProtobufHelper.schemaToPb(schema, + EnumSet.of(SchemaPBConversionFlags.SCHEMA_PB_WITHOUT_COMMENT, + SchemaPBConversionFlags.SCHEMA_PB_WITHOUT_ID))); + requestBuilder.setRowOperations(rowOps); + return requestBuilder; + } + + static class OperationsEncoder { + private Schema schema; + private ByteBuffer rows; + // We're filling this list as we go through the operations in encodeRow() and at the same time + // compute the total size, which will be used to right-size the array in toPB(). + private List indirect; + private long indirectWrittenBytes; + + /** + * Initializes the state of the encoder based on the schema and number of operations to encode. + * + * @param schema the schema of the table which the operations belong to. + * @param numOperations the number of operations. + */ + private void init(Schema schema, int numOperations) { + this.schema = schema; + + // Set up the encoded data. + // Estimate a maximum size for the data. This is conservative, but avoids + // having to loop through all the operations twice. + final int columnBitSetSize = Bytes.getBitSetSize(schema.getColumnCount()); + int sizePerRow = 1 /* for the op type */ + schema.getRowSize() + columnBitSetSize; + if (schema.hasNullableColumns()) { + // nullsBitSet is the same size as the columnBitSet + sizePerRow += columnBitSetSize; + } + + // TODO: would be more efficient to use a buffer which "chains" smaller allocations + // instead of a doubling buffer like BAOS. + this.rows = ByteBuffer.allocate(sizePerRow * numOperations) + .order(ByteOrder.LITTLE_ENDIAN); + this.indirect = new ArrayList<>(schema.getVarLengthColumnCount() * numOperations); + } + + /** + * Builds the row operations protobuf message with encoded operations. + * @return the row operations protobuf message. + */ + private RowOperationsPB toPB() { + RowOperationsPB.Builder rowOpsBuilder = RowOperationsPB.newBuilder(); + + // TODO: we could avoid a copy here by using an implementation that allows + // zero-copy on a slice of an array. + rows.limit(rows.position()); + rows.flip(); + rowOpsBuilder.setRows(ByteString.copyFrom(rows)); + if (indirect.size() > 0) { + // TODO: same as above, we could avoid a copy here by using an implementation that allows + // zero-copy on a slice of an array. + byte[] indirectData = new byte[(int)indirectWrittenBytes]; + int offset = 0; + for (ByteBuffer bb : indirect) { + int bbSize = bb.remaining(); + bb.get(indirectData, offset, bbSize); + offset += bbSize; + } + rowOpsBuilder.setIndirectData(UnsafeByteOperations.unsafeWrap(indirectData)); + } + return rowOpsBuilder.build(); + } + + private void encodeRow(PartialRow row, ChangeType type) { + int columnCount = row.getSchema().getColumnCount(); + BitSet columnsBitSet = row.getColumnsBitSet(); + BitSet nullsBitSet = row.getNullsBitSet(); + + // If this is a DELETE operation only the key columns should to be set. + if (type == ChangeType.DELETE || type == ChangeType.DELETE_IGNORE) { + columnCount = row.getSchema().getPrimaryKeyColumnCount(); + // Clear the bits indicating any non-key fields are set. + columnsBitSet.clear(schema.getPrimaryKeyColumnCount(), columnsBitSet.size()); + if (schema.hasNullableColumns()) { + nullsBitSet.clear(schema.getPrimaryKeyColumnCount(), nullsBitSet.size()); + } + } + + rows.put(type.toEncodedByte()); + rows.put(Bytes.fromBitSet(columnsBitSet, schema.getColumnCount())); + if (schema.hasNullableColumns()) { + rows.put(Bytes.fromBitSet(nullsBitSet, schema.getColumnCount())); + } + + byte[] rowData = row.getRowAlloc(); + int currentRowOffset = 0; + for (int colIdx = 0; colIdx < columnCount; colIdx++) { + ColumnSchema col = schema.getColumnByIndex(colIdx); + // Keys should always be specified, maybe check? + if (row.isSet(colIdx) && !row.isSetToNull(colIdx)) { + if (col.getType().isFixedSize()) { + rows.put(rowData, currentRowOffset, col.getTypeSize()); + } else { + ByteBuffer varLengthData = row.getVarLengthData().get(colIdx); + varLengthData.reset(); + rows.putLong(indirectWrittenBytes); + int bbSize = varLengthData.remaining(); + rows.putLong(bbSize); + indirect.add(varLengthData); + indirectWrittenBytes += bbSize; + } + } + currentRowOffset += col.getTypeSize(); + } + } + + public RowOperationsPB encodeOperations(List operations) { + if (operations == null || operations.isEmpty()) { + return null; + } + init(operations.get(0).table.getSchema(), operations.size()); + for (Operation operation : operations) { + encodeRow(operation.row, operation.getChangeType()); + } + return toPB(); + } + + public RowOperationsPB encodeRangePartitions( + List rangePartitions, + List splitRows) { + + if (splitRows.isEmpty() && rangePartitions.isEmpty()) { + return null; + } + + Schema schema = splitRows.isEmpty() ? rangePartitions.get(0).getLowerBound().getSchema() + : splitRows.get(0).getSchema(); + init(schema, splitRows.size() + 2 * rangePartitions.size()); + + for (PartialRow row : splitRows) { + encodeRow(row, ChangeType.SPLIT_ROWS); + } + + for (RangePartition partition : rangePartitions) { + encodeRow(partition.getLowerBound(), + partition.getLowerBoundType() == RangePartitionBound.INCLUSIVE_BOUND ? + ChangeType.RANGE_LOWER_BOUND : + ChangeType.EXCLUSIVE_RANGE_LOWER_BOUND); + encodeRow(partition.getUpperBound(), + partition.getUpperBoundType() == RangePartitionBound.EXCLUSIVE_BOUND ? + ChangeType.RANGE_UPPER_BOUND : + ChangeType.INCLUSIVE_RANGE_UPPER_BOUND); + } + + return toPB(); + } + + public RowOperationsPB encodeLowerAndUpperBounds(PartialRow lowerBound, + PartialRow upperBound, + RangePartitionBound lowerBoundType, + RangePartitionBound upperBoundType) { + init(lowerBound.getSchema(), 2); + encodeRow(lowerBound, + lowerBoundType == RangePartitionBound.INCLUSIVE_BOUND ? + ChangeType.RANGE_LOWER_BOUND : + ChangeType.EXCLUSIVE_RANGE_LOWER_BOUND); + encodeRow(upperBound, + upperBoundType == RangePartitionBound.EXCLUSIVE_BOUND ? + ChangeType.RANGE_UPPER_BOUND : + ChangeType.INCLUSIVE_RANGE_UPPER_BOUND); + return toPB(); + } + } + + static class OperationsDecoder { + private Schema schema; + private int columnBitmapSize; + private int columnCount; + + /** + * Utility method to initialize internals of the OperationsDecoder class. + * + * @param schema table's schema used for encoding/decoding of the data + */ + private void init(Schema schema) { + this.schema = schema; + this.columnCount = schema.getColumnCount(); + this.columnBitmapSize = Bytes.getBitSetSize(this.columnCount); + } + + /** + * Decode range partitions from the 'pb' message, assuming the data has been + * encoded using the table schema in 'schema'. + * + * @param pb the encoded data + * @param schema the table schema to use for decoding + * @return a list of PangePartition objects with corresponding bounds + */ + public List decodeRangePartitions( + RowOperationsPB pb, Schema schema) { + if (pb == null) { + return null; + } + if (!pb.hasRows()) { + throw new IllegalArgumentException("row operation PB lacks 'rows' field"); + } + + init(schema); + + ByteBuffer rowsBuf = ByteBuffer.wrap(pb.getRows().toByteArray()); + rowsBuf.order(ByteOrder.LITTLE_ENDIAN); + final byte[] indirectData = pb.getIndirectData().toByteArray(); + + List> decodedBounds = + new ArrayList<>(); + while (rowsBuf.hasRemaining()) { + decodedBounds.add(decodeBound(rowsBuf, indirectData)); + } + + if (decodedBounds.size() % 2 != 0) { + throw new IllegalArgumentException( + "unexpected odd number of range partition bounds"); + } + + List result = new ArrayList<>(); + for (int i = 0; i < decodedBounds.size(); i += 2) { + Pair lower = decodedBounds.get(i); + Pair upper = decodedBounds.get(i + 1); + + RangePartitionBound lowerType; + if (lower.getSecond() == RowOperationsPB.Type.EXCLUSIVE_RANGE_LOWER_BOUND) { + lowerType = RangePartitionBound.EXCLUSIVE_BOUND; + } else if (lower.getSecond() == RowOperationsPB.Type.RANGE_LOWER_BOUND) { + lowerType = RangePartitionBound.INCLUSIVE_BOUND; + } else { + throw new IllegalArgumentException(String.format( + "%s: unexpected bound type for the lower bound", lower.getSecond().toString())); + } + + RangePartitionBound upperType; + if (upper.getSecond() == RowOperationsPB.Type.INCLUSIVE_RANGE_UPPER_BOUND) { + upperType = RangePartitionBound.INCLUSIVE_BOUND; + } else if (upper.getSecond() == RowOperationsPB.Type.RANGE_UPPER_BOUND) { + upperType = RangePartitionBound.EXCLUSIVE_BOUND; + } else { + throw new IllegalArgumentException(String.format( + "%s: unexpected bound type for the upper bound", upper.getSecond().toString())); + } + + result.add(new RangePartition(lower.getFirst(), upper.getFirst(), lowerType, upperType)); + } + return result; + } + + /** + * Decode lower and upper range bounds encoded with the RowOperationsPB + * conventions. + * + * @param rowsBuf byte buffer wrapping RowOperationsPB.rows + * @param indirectData byte array of the RowOperationsPB.indirect_data field + * @return a pair: decoded bound as PartialRow and the type of the bound + */ + public Pair decodeBound( + ByteBuffer rowsBuf, byte[] indirectData) { + RowOperationsPB.Type opType; + final byte opTypeEncoded = rowsBuf.get(); + switch (opTypeEncoded) { + case RowOperationsPB.Type.EXCLUSIVE_RANGE_LOWER_BOUND_VALUE: + opType = RowOperationsPB.Type.EXCLUSIVE_RANGE_LOWER_BOUND; + break; + case RowOperationsPB.Type.RANGE_LOWER_BOUND_VALUE: + opType = RowOperationsPB.Type.RANGE_LOWER_BOUND; + break; + case RowOperationsPB.Type.INCLUSIVE_RANGE_UPPER_BOUND_VALUE: + opType = RowOperationsPB.Type.INCLUSIVE_RANGE_UPPER_BOUND; + break; + case RowOperationsPB.Type.RANGE_UPPER_BOUND_VALUE: + opType = RowOperationsPB.Type.RANGE_UPPER_BOUND; + break; + default: + throw new IllegalArgumentException(String.format( + "%d: unexpected operation type", opTypeEncoded)); + } + + // Read the 'isset' column bitmap. + byte[] columnsBitArray = new byte[columnBitmapSize]; + rowsBuf.get(columnsBitArray); + BitSet columnsBitSet = Bytes.toBitSet(columnsBitArray, 0, 8 * columnBitmapSize); + + // If present, read the 'null' column bitmap. + BitSet nullsBitSet = null; + if (schema.hasNullableColumns()) { + byte[] columnsNullArray = new byte[columnBitmapSize]; + rowsBuf.get(columnsNullArray); + nullsBitSet = Bytes.toBitSet(columnsNullArray, 0, 8 * columnBitmapSize); + } + + // Construct the PartialRow object to contain the boundary as decoded. + PartialRow resultRow = schema.newPartialRow(); + for (int i = 0; i < columnsBitSet.size(); ++i) { + // Read the columns which has been set to a non-null value. + if (columnsBitSet.get(i) && (nullsBitSet == null || !nullsBitSet.get(i))) { + final ColumnSchema columnSchema = schema.getColumnByIndex(i); + if (columnSchema.getType().isFixedSize()) { + // The data for fixed-size types is read from the 'rowsBuf' buffer. + byte[] columnData = new byte[columnSchema.getTypeSize()]; + rowsBuf.get(columnData); + resultRow.setRaw(i, columnData); + } else { + // The data for variable-size types is read from the 'indirectData' + // byte array. + final int indirectOffset = (int)rowsBuf.getLong(); + final int indirectSize = (int)rowsBuf.getLong(); + + ByteBuffer auxBuf = ByteBuffer.wrap( + indirectData, indirectOffset, indirectSize); + auxBuf.order(ByteOrder.LITTLE_ENDIAN); + + byte[] columnData = new byte[indirectSize]; + auxBuf.get(columnData); + resultRow.setRaw(i, columnData); + } + } + + } + return new Pair(resultRow, opType); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/OperationResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/OperationResponse.java new file mode 100644 index 0000000000..ca46387bfd --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/OperationResponse.java @@ -0,0 +1,134 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.List; +import javax.annotation.Nullable; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.tserver.Tserver; + +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class OperationResponse extends KuduRpcResponse { + + private final long writeTimestamp; + private final RowError rowError; + private final Operation operation; + private final ResourceMetrics writeOpMetrics; + + /** + * Package-private constructor to build an OperationResponse with a row error in the pb format. + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param writeTimestamp HT's write timestamp + * @param operation the operation that created this response + * @param errorPB a row error in pb format, can be null + */ + OperationResponse(long elapsedMillis, + String tsUUID, + long writeTimestamp, + Operation operation, + Tserver.WriteResponsePB.PerRowErrorPB errorPB, + Tserver.ResourceMetricsPB metricsPB) { + super(elapsedMillis, tsUUID); + this.writeTimestamp = writeTimestamp; + this.rowError = errorPB == null ? null : RowError.fromRowErrorPb(errorPB, operation, tsUUID); + this.operation = operation; + this.writeOpMetrics = metricsPB == null ? + null : ResourceMetrics.fromResourceMetricsPB(metricsPB); + } + + /** + * Package-private constructor to build an OperationResponse with a row error. + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param writeTimestamp HT's write timestamp + * @param operation the operation that created this response + * @param rowError a parsed row error, can be null + */ + OperationResponse(long elapsedMillis, + String tsUUID, + long writeTimestamp, + Operation operation, + RowError rowError) { + super(elapsedMillis, tsUUID); + this.writeTimestamp = writeTimestamp; + this.rowError = rowError; + this.operation = operation; + this.writeOpMetrics = null; + } + + /** + * Utility method that collects all the row errors from the given list of responses. + * @param responses a list of operation responses to collect the row errors from + * @return a combined list of row errors + */ + public static List collectErrors(List responses) { + List errors = new ArrayList<>(responses.size()); + for (OperationResponse resp : responses) { + if (resp.hasRowError()) { + errors.add(resp.getRowError()); + } + } + return errors; + } + + /** + * Gives the write timestamp that was returned by the Tablet Server. + * @return a long representing a HybridTime-encoded timestamp + */ + @InterfaceAudience.Private + public long getWriteTimestampRaw() { + return writeTimestamp; + } + + /** + * Returns a row error. If {@link #hasRowError()} returns false, then this method returns null. + * @return a row error, or null if the operation was successful + */ + public RowError getRowError() { + return rowError; + } + + /** + * Tells if this operation response contains a row error. + * @return true if this operation response has errors, else false + */ + public boolean hasRowError() { + return rowError != null; + } + + /** + * Returns the operation associated with this response. + * @return an operation, cannot be null + */ + Operation getOperation() { + return operation; + } + + /** + * Return the write operation metrics associated with this batch. + * @return write operation metrics associated with this batch, or null if there is none. + */ + @Nullable + ResourceMetrics getWriteOpMetrics() { + return this.writeOpMetrics; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PartialRow.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PartialRow.java new file mode 100644 index 0000000000..e41e7033a0 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PartialRow.java @@ -0,0 +1,1929 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.List; +import java.util.ListIterator; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnTypeAttributes; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.util.DateUtil; +import org.apache.kudu.util.DecimalUtil; +import org.apache.kudu.util.StringUtil; +import org.apache.kudu.util.TimestampUtil; + +/** + * Class used to represent parts of a row along with its schema.

+ * + * Values can be replaced as often as needed, but once the enclosing {@link Operation} is applied + * then they cannot be changed again. This means that a PartialRow cannot be reused.

+ * + * Each PartialRow is backed by an byte array where all the cells (except strings and binary data) + * are written. The others are kept in a List.

+ * + * This class isn't thread-safe. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class PartialRow { + + private final Schema schema; + + // Variable length data. If string, will be UTF-8 encoded. Elements of this list _must_ have a + // mark that we can reset() to. Readers of these fields (encoders, etc) must call reset() before + // attempting to read these values. + private final List varLengthData; + private final byte[] rowAlloc; + + private final BitSet columnsBitSet; + private final BitSet nullsBitSet; + + private boolean frozen = false; + + /** + * This is not a stable API, prefer using {@link Schema#newPartialRow()} + * to create a new partial row. + * @param schema the schema to use for this row + */ + public PartialRow(Schema schema) { + this.schema = schema; + this.columnsBitSet = new BitSet(this.schema.getColumnCount()); + this.nullsBitSet = schema.hasNullableColumns() ? + new BitSet(this.schema.getColumnCount()) : null; + this.rowAlloc = new byte[schema.getRowSize()]; + // Pre-fill the array with nulls. We'll only replace cells that have varlen values. + this.varLengthData = Arrays.asList(new ByteBuffer[this.schema.getColumnCount()]); + } + + /** + * Creates a new partial row by deep-copying the data-fields of the provided partial row. + * @param row the partial row to copy + */ + PartialRow(PartialRow row) { + this.schema = row.schema; + + this.varLengthData = Lists.newArrayListWithCapacity(row.varLengthData.size()); + for (ByteBuffer data: row.varLengthData) { + if (data == null) { + this.varLengthData.add(null); + } else { + data.reset(); + // Deep copy the ByteBuffer. + ByteBuffer clone = ByteBuffer.allocate(data.remaining()); + clone.put(data); + clone.flip(); + + clone.mark(); // We always expect a mark. + this.varLengthData.add(clone); + } + } + + this.rowAlloc = row.rowAlloc.clone(); + this.columnsBitSet = (BitSet) row.columnsBitSet.clone(); + this.nullsBitSet = row.nullsBitSet == null ? null : (BitSet) row.nullsBitSet.clone(); + } + + /** + * Add a boolean for the specified column. + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addBoolean(int columnIndex, boolean val) { + checkNotFrozen(); + checkColumn(schema.getColumnByIndex(columnIndex), Type.BOOL); + rowAlloc[getPositionInRowAllocAndSetBitSet(columnIndex)] = (byte) (val ? 1 : 0); + } + + /** + * Add a boolean for the specified column. + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addBoolean(String columnName, boolean val) { + addBoolean(schema.getColumnIndex(columnName), val); + } + + /** + * Get the specified column's boolean + * @param columnName name of the column to get data for + * @return a boolean + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public boolean getBoolean(String columnName) { + return getBoolean(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's boolean + * @param columnIndex Column index in the schema + * @return a boolean + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public boolean getBoolean(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.BOOL); + checkValue(columnIndex); + byte b = rowAlloc[schema.getColumnOffset(columnIndex)]; + return b == 1; + } + + /** + * Add a byte for the specified column. + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addByte(int columnIndex, byte val) { + checkNotFrozen(); + checkColumn(schema.getColumnByIndex(columnIndex), Type.INT8); + rowAlloc[getPositionInRowAllocAndSetBitSet(columnIndex)] = val; + } + + /** + * Add a byte for the specified column. + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addByte(String columnName, byte val) { + addByte(schema.getColumnIndex(columnName), val); + } + + /** + * Get the specified column's byte + * @param columnName name of the column to get data for + * @return a byte + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public byte getByte(String columnName) { + return getByte(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's byte + * @param columnIndex Column index in the schema + * @return a byte + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public byte getByte(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.INT8); + checkValue(columnIndex); + return rowAlloc[schema.getColumnOffset(columnIndex)]; + } + + /** + * Add a short for the specified column. + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addShort(int columnIndex, short val) { + checkNotFrozen(); + checkColumn(schema.getColumnByIndex(columnIndex), Type.INT16); + Bytes.setShort(rowAlloc, val, getPositionInRowAllocAndSetBitSet(columnIndex)); + } + + /** + * Add a short for the specified column. + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addShort(String columnName, short val) { + addShort(schema.getColumnIndex(columnName), val); + } + + /** + * Get the specified column's short + * @param columnName name of the column to get data for + * @return a short + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public short getShort(String columnName) { + return getShort(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's short + * @param columnIndex Column index in the schema + * @return a short + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public short getShort(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.INT16); + checkValue(columnIndex); + return Bytes.getShort(rowAlloc, schema.getColumnOffset(columnIndex)); + } + + /** + * Add an int for the specified column. + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addInt(int columnIndex, int val) { + checkNotFrozen(); + checkColumn(schema.getColumnByIndex(columnIndex), Type.INT32); + Bytes.setInt(rowAlloc, val, getPositionInRowAllocAndSetBitSet(columnIndex)); + } + + /** + * Add an int for the specified column. + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addInt(String columnName, int val) { + addInt(schema.getColumnIndex(columnName), val); + } + + /** + * Get the specified column's integer + * @param columnName name of the column to get data for + * @return an integer + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public int getInt(String columnName) { + return getInt(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's integer + * @param columnIndex Column index in the schema + * @return an integer + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public int getInt(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.INT32); + checkValue(columnIndex); + return Bytes.getInt(rowAlloc, schema.getColumnOffset(columnIndex)); + } + + /** + * Add an long for the specified column. + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addLong(int columnIndex, long val) { + checkNotFrozen(); + checkColumn(schema.getColumnByIndex(columnIndex), Type.INT64, Type.UNIXTIME_MICROS); + Bytes.setLong(rowAlloc, val, getPositionInRowAllocAndSetBitSet(columnIndex)); + } + + /** + * Add an long for the specified column. + * + * If this is a UNIXTIME_MICROS column, the long value provided should be the number of + * microseconds between a given time and January 1, 1970 UTC. + * For example, to encode the current time, use setLong(System.currentTimeMillis() * 1000); + * + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addLong(String columnName, long val) { + addLong(schema.getColumnIndex(columnName), val); + } + + /** + * Get the specified column's long + * + * If this is a UNIXTIME_MICROS column, the long value corresponds to a number of microseconds + * since midnight, January 1, 1970 UTC. + * + * @param columnName name of the column to get data for + * @return a long + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public long getLong(String columnName) { + return getLong(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's long + * + * If this is a UNIXTIME_MICROS column, the long value corresponds to a number of microseconds + * since midnight, January 1, 1970 UTC. + * + * @param columnIndex Column index in the schema + * @return a long + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public long getLong(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.INT64, Type.UNIXTIME_MICROS); + checkColumnExists(schema.getColumnByIndex(columnIndex)); + checkValue(columnIndex); + return Bytes.getLong(rowAlloc, schema.getColumnOffset(columnIndex)); + } + + /** + * Add an float for the specified column. + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addFloat(int columnIndex, float val) { + checkNotFrozen(); + checkColumn(schema.getColumnByIndex(columnIndex), Type.FLOAT); + Bytes.setFloat(rowAlloc, val, getPositionInRowAllocAndSetBitSet(columnIndex)); + } + + /** + * Add an float for the specified column. + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addFloat(String columnName, float val) { + addFloat(schema.getColumnIndex(columnName), val); + } + + /** + * Get the specified column's float + * @param columnName name of the column to get data for + * @return a float + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public float getFloat(String columnName) { + return getFloat(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's float + * @param columnIndex Column index in the schema + * @return a float + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public float getFloat(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.FLOAT); + checkValue(columnIndex); + return Bytes.getFloat(rowAlloc, schema.getColumnOffset(columnIndex)); + } + + /** + * Add an double for the specified column. + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addDouble(int columnIndex, double val) { + checkNotFrozen(); + checkColumn(schema.getColumnByIndex(columnIndex), Type.DOUBLE); + Bytes.setDouble(rowAlloc, val, getPositionInRowAllocAndSetBitSet(columnIndex)); + } + + /** + * Add an double for the specified column. + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addDouble(String columnName, double val) { + addDouble(schema.getColumnIndex(columnName), val); + } + + /** + * Get the specified column's double + * @param columnName name of the column to get data for + * @return a double + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public double getDouble(String columnName) { + return getDouble(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's double + * @param columnIndex Column index in the schema + * @return a double + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public double getDouble(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.DOUBLE); + checkValue(columnIndex); + return Bytes.getDouble(rowAlloc, schema.getColumnOffset(columnIndex)); + } + + /** + * Add a Decimal for the specified column. + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addDecimal(int columnIndex, BigDecimal val) { + checkNotFrozen(); + ColumnSchema column = schema.getColumnByIndex(columnIndex); + ColumnTypeAttributes typeAttributes = column.getTypeAttributes(); + checkColumn(column, Type.DECIMAL); + BigDecimal coercedVal = DecimalUtil.coerce(val,typeAttributes.getPrecision(), + typeAttributes.getScale()); + Bytes.setBigDecimal(rowAlloc, coercedVal, typeAttributes.getPrecision(), + getPositionInRowAllocAndSetBitSet(columnIndex)); + } + + /** + * Add a Decimal for the specified column. + * + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addDecimal(String columnName, BigDecimal val) { + addDecimal(schema.getColumnIndex(columnName), val); + } + + /** + * Get the specified column's BigDecimal + * + * @param columnName name of the column to get data for + * @return a BigDecimal + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public BigDecimal getDecimal(String columnName) { + return getDecimal(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's Decimal. + * + * @param columnIndex Column index in the schema + * @return a BigDecimal + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public BigDecimal getDecimal(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.DECIMAL); + checkColumnExists(schema.getColumnByIndex(columnIndex)); + checkValue(columnIndex); + ColumnSchema column = schema.getColumnByIndex(columnIndex); + ColumnTypeAttributes typeAttributes = column.getTypeAttributes(); + return Bytes.getDecimal(rowAlloc, schema.getColumnOffset(columnIndex), + typeAttributes.getPrecision(), typeAttributes.getScale()); + } + + /** + * Add a Timestamp for the specified column. + * + * Note: Timestamp instances with nanosecond precision are truncated to microseconds. + * + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addTimestamp(int columnIndex, Timestamp val) { + checkNotFrozen(); + ColumnSchema column = schema.getColumnByIndex(columnIndex); + checkColumn(column, Type.UNIXTIME_MICROS); + long micros = TimestampUtil.timestampToMicros(val); + Bytes.setLong(rowAlloc, micros, getPositionInRowAllocAndSetBitSet(columnIndex)); + } + + /** + * Add a Timestamp for the specified column. + * + * Note: Timestamp instances with nanosecond precision are truncated to microseconds. + * + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addTimestamp(String columnName, Timestamp val) { + addTimestamp(schema.getColumnIndex(columnName), val); + } + + /** + * Get the specified column's Timestamp. + * + * @param columnName name of the column to get data for + * @return a Timestamp + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public Timestamp getTimestamp(String columnName) { + return getTimestamp(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's Timestamp. + * + * @param columnIndex Column index in the schema + * @return a Timestamp + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public Timestamp getTimestamp(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.UNIXTIME_MICROS); + checkColumnExists(schema.getColumnByIndex(columnIndex)); + checkValue(columnIndex); + long micros = Bytes.getLong(rowAlloc, schema.getColumnOffset(columnIndex)); + return TimestampUtil.microsToTimestamp(micros); + } + + /** + * Add a sql.Date for the specified column. + * + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addDate(int columnIndex, Date val) { + checkNotFrozen(); + checkColumn(schema.getColumnByIndex(columnIndex), Type.DATE); + int days = DateUtil.sqlDateToEpochDays(val); + Bytes.setInt(rowAlloc, days, getPositionInRowAllocAndSetBitSet(columnIndex)); + } + + /** + * Add a Date for the specified column. + * + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addDate(String columnName, Date val) { + addDate(schema.getColumnIndex(columnName), val); + } + + /** + * Get the specified column's Date. + * + * @param columnName name of the column to get data for + * @return a Date + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public Date getDate(String columnName) { + return getDate(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's Date. + * + * @param columnIndex Column index in the schema + * @return a Date + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public Date getDate(int columnIndex) { + checkColumnExists(schema.getColumnByIndex(columnIndex)); + checkColumn(schema.getColumnByIndex(columnIndex), Type.DATE); + checkValue(columnIndex); + int days = Bytes.getInt(rowAlloc, schema.getColumnOffset(columnIndex)); + return DateUtil.epochDaysToSqlDate(days); + } + + /** + * Add a String for the specified column. + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addString(int columnIndex, String val) { + addStringUtf8(columnIndex, Bytes.fromString(val)); + } + + /** + * Add a String for the specified column. + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addString(String columnName, String val) { + addStringUtf8(columnName, Bytes.fromString(val)); + } + + /** + * Add a VARCHAR for the specified column. + * + * Truncates val to the length of the column in characters. + * + * @param columnIndex Index of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist, is the wrong type + * or the string is not UTF-8 + * @throws IllegalStateException if the row was already applied + */ + public void addVarchar(int columnIndex, String val) { + ColumnSchema column = schema.getColumnByIndex(columnIndex); + checkColumn(column, Type.VARCHAR); + checkNotFrozen(); + int length = column.getTypeAttributes().getLength(); + if (length < val.length()) { + val = val.substring(0, length); + } + byte[] bytes = Bytes.fromString(val); + addVarLengthData(columnIndex, bytes); + } + + /** + * Add a VARCHAR for the specified column. + * + * Truncates val to the length of the column in characters. + * + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist, is the wrong type + * or the string is not UTF-8 + * @throws IllegalStateException if the row was already applied + */ + public void addVarchar(String columnName, String val) { + addVarchar(schema.getColumnIndex(columnName), val); + } + + /** + * Get the specified column's string. + * @param columnName name of the column to get data for + * @return a string + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public String getString(String columnName) { + return getString(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's string. + * @param columnIndex Column index in the schema + * @return a string + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public String getString(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.STRING); + checkValue(columnIndex); + return new String(getVarLengthData(columnIndex).array(), StandardCharsets.UTF_8); + } + + /** + * Get the specified column's VARCHAR. + * @param columnName Name of the column to get the data for + * @return a VARCHAR + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public String getVarchar(String columnName) { + return getVarchar(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's VARCHAR. + * @param columnIndex Column index in the schema + * @return a VARCHAR + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public String getVarchar(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.VARCHAR); + checkValue(columnIndex); + return new String(getVarLengthData(columnIndex).array(), StandardCharsets.UTF_8); + } + + /** + * Add a String for the specified value, encoded as UTF8. + * Note that the provided value must not be mutated after this. + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addStringUtf8(int columnIndex, byte[] val) { + // TODO: use Utf8.isWellFormed from Guava 16 to verify that. + // the user isn't putting in any garbage data. + checkNotFrozen(); + checkColumn(schema.getColumnByIndex(columnIndex), Type.STRING); + addVarLengthData(columnIndex, val); + } + + /** + * Add a String for the specified value, encoded as UTF8. + * Note that the provided value must not be mutated after this. + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * + */ + public void addStringUtf8(String columnName, byte[] val) { + addStringUtf8(schema.getColumnIndex(columnName), val); + } + + /** + * Add binary data with the specified value. + * Note that the provided value must not be mutated after this. + * @param columnIndex the column's index in the schema + * @param val value to add + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addBinary(int columnIndex, byte[] val) { + checkNotFrozen(); + checkColumn(schema.getColumnByIndex(columnIndex), Type.BINARY); + addVarLengthData(columnIndex, val); + } + + /** + * Add binary data with the specified value, from the current ByteBuffer's position to its limit. + * This method duplicates the ByteBuffer but doesn't copy the data. This means that the wrapped + * data must not be mutated after this. + * @param columnIndex the column's index in the schema + * @param value byte buffer to get the value from + * @throws IllegalArgumentException if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addBinary(int columnIndex, ByteBuffer value) { + checkNotFrozen(); + checkColumn(schema.getColumnByIndex(columnIndex), Type.BINARY); + addVarLengthData(columnIndex, value); + } + + /** + * Add binary data with the specified value. + * Note that the provided value must not be mutated after this. + * @param columnName Name of the column + * @param val value to add + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addBinary(String columnName, byte[] val) { + addBinary(schema.getColumnIndex(columnName), val); + } + + /** + * Add binary data with the specified value, from the current ByteBuffer's position to its limit. + * This method duplicates the ByteBuffer but doesn't copy the data. This means that the wrapped + * data must not be mutated after this. + * @param columnName Name of the column + * @param value byte buffer to get the value from + * @throws IllegalArgumentException if the column doesn't exist + * or if the value doesn't match the column's type + * @throws IllegalStateException if the row was already applied + */ + public void addBinary(String columnName, ByteBuffer value) { + addBinary(schema.getColumnIndex(columnName), value); + } + + /** + * Get a copy of the specified column's binary data. + * @param columnName name of the column to get data for + * @return a byte[] with the binary data. + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public byte[] getBinaryCopy(String columnName) { + return getBinaryCopy(this.schema.getColumnIndex(columnName)); + } + + /** + * Get a copy of the specified column's binary data. + * @param columnIndex Column index in the schema + * @return a byte[] with the binary data. + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public byte[] getBinaryCopy(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.BINARY); + checkValue(columnIndex); + byte[] data = getVarLengthData(columnIndex).array(); + byte[] ret = new byte[data.length]; + System.arraycopy(data, 0, ret, 0, data.length); + return ret; + } + + /** + * Get the specified column's binary data. + * + * This doesn't copy the data and instead returns a ByteBuffer that wraps it. + * + * @param columnName name of the column to get data for + * @return a ByteBuffer with the binary data. + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public ByteBuffer getBinary(String columnName) { + return getBinary(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's binary data. + * + * This doesn't copy the data and instead returns a ByteBuffer that wraps it. + * + * @param columnIndex Column index in the schema + * @return a ByteBuffer with the binary data. + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public ByteBuffer getBinary(int columnIndex) { + checkColumn(schema.getColumnByIndex(columnIndex), Type.BINARY); + checkValue(columnIndex); + return getVarLengthData(columnIndex); + } + + private void addVarLengthData(int columnIndex, byte[] val) { + addVarLengthData(columnIndex, ByteBuffer.wrap(val)); + } + + private void addVarLengthData(int columnIndex, ByteBuffer val) { + // A duplicate will copy all the original's metadata but still point to the same content. + ByteBuffer duplicate = val.duplicate(); + // Mark the current position so we can reset to it. + duplicate.mark(); + + varLengthData.set(columnIndex, duplicate); + // Set the usage bit but we don't care where it is. + getPositionInRowAllocAndSetBitSet(columnIndex); + // We don't set anything in row alloc, it will be managed at encoding time. + } + + /** + * Get the list variable length data cells that were added to this row. + * @return a list of binary data, may be empty + */ + List getVarLengthData() { + return varLengthData; + } + + private ByteBuffer getVarLengthData(int columnIndex) { + return varLengthData.get(columnIndex).duplicate(); + } + + /** + * Set the specified column to null + * @param columnIndex the column's index in the schema + * @throws IllegalArgumentException if the column doesn't exist or cannot be set to null + * @throws IllegalStateException if the row was already applied + */ + public void setNull(int columnIndex) { + setNull(this.schema.getColumnByIndex(columnIndex)); + } + + /** + * Set the specified column to null + * @param columnName Name of the column + * @throws IllegalArgumentException if the column doesn't exist or cannot be set to null + * @throws IllegalStateException if the row was already applied + */ + public void setNull(String columnName) { + setNull(this.schema.getColumn(columnName)); + } + + private void setNull(ColumnSchema column) { + assert nullsBitSet != null; + checkNotFrozen(); + checkColumnExists(column); + if (!column.isNullable()) { + throw new IllegalArgumentException(column.getName() + " cannot be set to null"); + } + int idx = schema.getColumns().indexOf(column); + columnsBitSet.set(idx); + nullsBitSet.set(idx); + } + + /** + * Get if the specified column is NULL + * @param columnName name of the column in the schema + * @return true if the column cell is null and the column is nullable, + * false otherwise + * @throws IllegalArgumentException if the column doesn't exist + */ + public boolean isNull(String columnName) { + return isNull(this.schema.getColumnIndex(columnName)); + } + + /** + * Get if the specified column is NULL + * @param columnIndex Column index in the schema + * @return true if the column cell is null and the column is nullable, + * false otherwise + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public boolean isNull(int columnIndex) { + checkColumnExists(schema.getColumnByIndex(columnIndex)); + return schema.getColumnByIndex(columnIndex).isNullable() && isSetToNull(columnIndex); + } + + /** + * Add the specified column's value as an Object. + * + * This method is useful when you don't care about autoboxing + * and your existing type handling logic is based on Java types. + * + * The accepted Object type is based on the column's {@link Type}: + * Type.BOOL -> java.lang.Boolean + * Type.INT8 -> java.lang.Byte + * Type.INT16 -> java.lang.Short + * Type.INT32 -> java.lang.Integer + * Type.INT64 -> java.lang.Long + * Type.UNIXTIME_MICROS -> java.sql.Timestamp or java.lang.Long + * Type.FLOAT -> java.lang.Float + * Type.DOUBLE -> java.lang.Double + * Type.STRING -> java.lang.String + * Type.VARCHAR -> java.lang.String + * Type.BINARY -> byte[] or java.lang.ByteBuffer + * Type.DECIMAL -> java.math.BigDecimal + * + * @param columnName name of the column in the schema + * @param val the value to add as an Object + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addObject(String columnName, Object val) { + addObject(this.schema.getColumnIndex(columnName), val); + } + + /** + * Add the specified column's value as an Object. + * + * This method is useful when you don't care about autoboxing + * and your existing type handling logic is based on Java types. + * + * The accepted Object type is based on the column's {@link Type}: + * Type.BOOL -> java.lang.Boolean + * Type.INT8 -> java.lang.Byte + * Type.INT16 -> java.lang.Short + * Type.INT32 -> java.lang.Integer + * Type.INT64 -> java.lang.Long + * Type.UNIXTIME_MICROS -> java.sql.Timestamp or java.lang.Long + * Type.FLOAT -> java.lang.Float + * Type.DOUBLE -> java.lang.Double + * Type.STRING -> java.lang.String + * Type.VARCHAR -> java.lang.String + * Type.BINARY -> byte[] or java.lang.ByteBuffer + * Type.DECIMAL -> java.math.BigDecimal + * Type.DATE -> java.sql.Date + * + * @param columnIndex column index in the schema + * @param val the value to add as an Object + * @throws IllegalStateException if the row was already applied + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public void addObject(int columnIndex, Object val) { + checkNotFrozen(); + ColumnSchema col = schema.getColumnByIndex(columnIndex); + checkColumnExists(col); + try { + if (val == null) { + setNull(columnIndex); + return; + } + switch (col.getType()) { + case BOOL: + addBoolean(columnIndex, (Boolean) val); + break; + case INT8: + addByte(columnIndex, (Byte) val); + break; + case INT16: + addShort(columnIndex, (Short) val); + break; + case INT32: + addInt(columnIndex, (Integer) val); + break; + case INT64: + addLong(columnIndex, (Long) val); + break; + case UNIXTIME_MICROS: + if (val instanceof Timestamp) { + addTimestamp(columnIndex, (Timestamp) val); + } else { + addLong(columnIndex, (Long) val); + } + break; + case FLOAT: + addFloat(columnIndex, (Float) val); + break; + case DOUBLE: + addDouble(columnIndex, (Double) val); + break; + case STRING: + addString(columnIndex, (String) val); + break; + case VARCHAR: + addVarchar(columnIndex, (String) val); + break; + case DATE: + addDate(columnIndex, (Date) val); + break; + case BINARY: + if (val instanceof byte[]) { + addBinary(columnIndex, (byte[]) val); + } else { + addBinary(columnIndex, (ByteBuffer) val); + } + break; + case DECIMAL: + addDecimal(columnIndex, (BigDecimal) val); + break; + default: + throw new IllegalArgumentException("Unsupported column type: " + col.getType()); + } + } catch (ClassCastException e) { + throw new IllegalArgumentException( + "Value type does not match column type " + col.getType() + + " for column " + col.getName()); + } + } + + /** + * Get the specified column's value as an Object. + * + * This method is useful when you don't care about autoboxing + * and your existing type handling logic is based on Java types. + * + * The Object type is based on the column's {@link Type}: + * Type.BOOL -> java.lang.Boolean + * Type.INT8 -> java.lang.Byte + * Type.INT16 -> java.lang.Short + * Type.INT32 -> java.lang.Integer + * Type.INT64 -> java.lang.Long + * Type.UNIXTIME_MICROS -> java.sql.Timestamp + * Type.FLOAT -> java.lang.Float + * Type.DOUBLE -> java.lang.Double + * Type.STRING -> java.lang.String + * Type.VARCHAR -> java.lang.String + * Type.BINARY -> byte[] + * Type.DECIMAL -> java.math.BigDecimal + * Type.DATE -> java.sql.Date + * + * @param columnName name of the column in the schema + * @return the column's value as an Object, null if the column value is null or unset + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public Object getObject(String columnName) { + return getObject(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's value as an Object. + * + * This method is useful when you don't care about autoboxing + * and your existing type handling logic is based on Java types. + * + * The Object type is based on the column's {@link Type}: + * Type.BOOL -> java.lang.Boolean + * Type.INT8 -> java.lang.Byte + * Type.INT16 -> java.lang.Short + * Type.INT32 -> java.lang.Integer + * Type.INT64 -> java.lang.Long + * Type.UNIXTIME_MICROS -> java.sql.Timestamp + * Type.FLOAT -> java.lang.Float + * Type.DOUBLE -> java.lang.Double + * Type.STRING -> java.lang.String + * Type.VARCHAR -> java.lang.String + * Type.BINARY -> byte[] + * Type.DECIMAL -> java.math.BigDecimal + * + * @param columnIndex Column index in the schema + * @return the column's value as an Object, null if the column value is null or unset + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public Object getObject(int columnIndex) { + checkColumnExists(schema.getColumnByIndex(columnIndex)); + if (isNull(columnIndex) || !isSet(columnIndex)) { + return null; + } + Type type = schema.getColumnByIndex(columnIndex).getType(); + switch (type) { + case BOOL: return getBoolean(columnIndex); + case INT8: return getByte(columnIndex); + case INT16: return getShort(columnIndex); + case INT32: return getInt(columnIndex); + case INT64: return getLong(columnIndex); + case DATE: return getDate(columnIndex); + case UNIXTIME_MICROS: return getTimestamp(columnIndex); + case FLOAT: return getFloat(columnIndex); + case DOUBLE: return getDouble(columnIndex); + case VARCHAR: return getVarchar(columnIndex); + case STRING: return getString(columnIndex); + case BINARY: return getBinaryCopy(columnIndex); + case DECIMAL: return getDecimal(columnIndex); + default: throw new UnsupportedOperationException("Unsupported type: " + type); + } + } + + /** + * Verifies if the column exists and belongs to one of the specified types + * @param column column the user wants to set + * @param types types we expect + * @throws IllegalArgumentException if the column or type was invalid + */ + private void checkColumn(ColumnSchema column, Type... types) { + checkColumnExists(column); + for (Type type : types) { + if (column.getType().equals(type)) { + return; + } + } + throw new IllegalArgumentException(String.format("%s isn't %s, it's %s", column.getName(), + Arrays.toString(types), column.getType().getName())); + } + + /** + * @param column column the user wants to set + * @throws IllegalArgumentException if the column doesn't exist + */ + private void checkColumnExists(ColumnSchema column) { + if (column == null) { + throw new IllegalArgumentException("Column name isn't present in the table's schema"); + } + } + + /** + * @param columnIndex Column index in the schema + * @throws IllegalArgumentException if the column is unset or null + */ + private void checkValue(int columnIndex) { + if (!isSet(columnIndex)) { + throw new IllegalArgumentException("Column value is not set"); + } + + if (isNull(columnIndex)) { + throw new IllegalArgumentException("Column value is null"); + } + } + + /** + * @throws IllegalStateException if the row was already applied + */ + private void checkNotFrozen() { + if (frozen) { + throw new IllegalStateException("This row was already applied and cannot be modified."); + } + } + + /** + * Sets the column bit set for the column index, and returns the column's offset. + * @param columnIndex the index of the column to get the position for and mark as set + * @return the offset in rowAlloc for the column + */ + private int getPositionInRowAllocAndSetBitSet(int columnIndex) { + columnsBitSet.set(columnIndex); + return schema.getColumnOffset(columnIndex); + } + + /** + * Get if the specified column has been set + * @param columnName name of the column in the schema + * @return true if the column has been set + * @throws IllegalArgumentException if the column doesn't exist + */ + public boolean isSet(String columnName) { + return isSet(this.schema.getColumnIndex(columnName)); + } + + /** + * Get if the specified column has been set + * @param columnIndex Column index in the schema + * @return true if the column has been set + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public boolean isSet(int columnIndex) { + checkColumnExists(schema.getColumnByIndex(columnIndex)); + return this.columnsBitSet.get(columnIndex); + } + + /** + * Tells if the specified column was set to null by the user + * @param column column's index in the schema + * @return true if it was set, else false + */ + boolean isSetToNull(int column) { + if (this.nullsBitSet == null) { + return false; + } + return this.nullsBitSet.get(column); + } + + /** + * Returns the encoded primary key of the row. + * @return a byte array containing an encoded primary key + */ + public byte[] encodePrimaryKey() { + return KeyEncoder.encodePrimaryKey(this); + } + + /** {@inheritDoc} */ + @Override + public String toString() { + int numCols = schema.getColumnCount(); + StringBuilder sb = new StringBuilder(); + sb.append('('); + boolean first = true; + for (int idx = 0; idx < numCols; ++idx) { + if (!columnsBitSet.get(idx)) { + continue; + } + + if (first) { + first = false; + } else { + sb.append(", "); + } + + ColumnSchema col = schema.getColumnByIndex(idx); + sb.append(col.getType().getName()); + if (col.getTypeAttributes() != null) { + sb.append(col.getTypeAttributes().toStringForType(col.getType())); + } + sb.append(' '); + sb.append(col.getName()); + sb.append('='); + + appendCellValueDebugString(idx, sb); + } + sb.append(')'); + return sb.toString(); + } + + /** + * Transforms the row key into a string representation where each column is in the format: + * "type col_name=value". + * @return a string representation of the operation's row key + */ + public String stringifyRowKey() { + int numRowKeys = schema.getPrimaryKeyColumnCount(); + List idxs = new ArrayList<>(numRowKeys); + for (int i = 0; i < numRowKeys; i++) { + idxs.add(i); + } + + StringBuilder sb = new StringBuilder(); + sb.append("("); + appendDebugString(idxs, sb); + sb.append(")"); + return sb.toString(); + } + + /** + * Appends a debug string for the provided columns in the row. + * + * @param idxs the column indexes + * @param sb the string builder to append to + */ + void appendDebugString(List idxs, StringBuilder sb) { + boolean first = true; + for (int idx : idxs) { + if (first) { + first = false; + } else { + sb.append(", "); + } + + ColumnSchema col = schema.getColumnByIndex(idx); + + sb.append(col.getType().getName()); + sb.append(' '); + sb.append(col.getName()); + sb.append('='); + + appendCellValueDebugString(idx, sb); + } + } + + /** + * Appends a short debug string for the provided columns in the row. + * + * @param idxs the column indexes + * @param sb the string builder to append to + */ + void appendShortDebugString(List idxs, StringBuilder sb) { + boolean first = true; + for (int idx : idxs) { + if (first) { + first = false; + } else { + sb.append(", "); + } + appendCellValueDebugString(idx, sb); + } + } + + /** + * Appends a debug string for the provided cell value in the row. + * + * @param idx the column index + * @param sb the string builder to append to + */ + void appendCellValueDebugString(Integer idx, StringBuilder sb) { + ColumnSchema col = schema.getColumnByIndex(idx); + Preconditions.checkState(columnsBitSet.get(idx), "Column %s is not set", col.getName()); + + if (nullsBitSet != null && nullsBitSet.get(idx)) { + sb.append("NULL"); + return; + } + + switch (col.getType()) { + case BOOL: + sb.append(Bytes.getBoolean(rowAlloc, schema.getColumnOffset(idx))); + return; + case INT8: + sb.append(Bytes.getByte(rowAlloc, schema.getColumnOffset(idx))); + return; + case INT16: + sb.append(Bytes.getShort(rowAlloc, schema.getColumnOffset(idx))); + return; + case INT32: + sb.append(Bytes.getInt(rowAlloc, schema.getColumnOffset(idx))); + return; + case INT64: + sb.append(Bytes.getLong(rowAlloc, schema.getColumnOffset(idx))); + return; + case DATE: + sb.append(DateUtil.epochDaysToDateString( + Bytes.getInt(rowAlloc, schema.getColumnOffset(idx)))); + return; + case UNIXTIME_MICROS: + sb.append(TimestampUtil.timestampToString( + Bytes.getLong(rowAlloc, schema.getColumnOffset(idx)))); + return; + case FLOAT: + sb.append(Bytes.getFloat(rowAlloc, schema.getColumnOffset(idx))); + return; + case DOUBLE: + sb.append(Bytes.getDouble(rowAlloc, schema.getColumnOffset(idx))); + return; + case DECIMAL: + ColumnTypeAttributes typeAttributes = col.getTypeAttributes(); + sb.append(Bytes.getDecimal(rowAlloc, schema.getColumnOffset(idx), + typeAttributes.getPrecision(), typeAttributes.getScale())); + return; + case VARCHAR: + case BINARY: + case STRING: + ByteBuffer value = getVarLengthData().get(idx).duplicate(); + value.reset(); // Make sure we start at the beginning. + byte[] data = new byte[value.limit() - value.position()]; + value.get(data); + if (col.getType() == Type.STRING || col.getType() == Type.VARCHAR) { + sb.append('"'); + StringUtil.appendEscapedSQLString(Bytes.getString(data), sb); + sb.append('"'); + } else { + sb.append(Bytes.pretty(data)); + } + return; + default: + throw new RuntimeException("unreachable"); + } + } + + /** + * Sets the column to the minimum possible value for the column's type. + * @param index the index of the column to set to the minimum + */ + void setMin(int index) { + ColumnSchema column = schema.getColumnByIndex(index); + Type type = column.getType(); + switch (type) { + case BOOL: + addBoolean(index, false); + break; + case INT8: + addByte(index, Byte.MIN_VALUE); + break; + case INT16: + addShort(index, Short.MIN_VALUE); + break; + case INT32: + addInt(index, Integer.MIN_VALUE); + break; + case DATE: + addDate(index, DateUtil.epochDaysToSqlDate(DateUtil.MIN_DATE_VALUE)); + break; + case INT64: + case UNIXTIME_MICROS: + addLong(index, Long.MIN_VALUE); + break; + case FLOAT: + addFloat(index, -Float.MAX_VALUE); + break; + case DOUBLE: + addDouble(index, -Double.MAX_VALUE); + break; + case DECIMAL: + ColumnTypeAttributes typeAttributes = column.getTypeAttributes(); + addDecimal(index, + DecimalUtil.minValue(typeAttributes.getPrecision(), typeAttributes.getScale())); + break; + case STRING: + addStringUtf8(index, AsyncKuduClient.EMPTY_ARRAY); + break; + case BINARY: + addBinary(index, AsyncKuduClient.EMPTY_ARRAY); + break; + case VARCHAR: + addVarchar(index, ""); + break; + default: + throw new RuntimeException("unreachable"); + } + } + + /** + * Sets the column to the provided raw value. + * @param index the index of the column to set + * @param value the raw value + */ + void setRaw(int index, byte[] value) { + ColumnSchema column = schema.getColumnByIndex(index); + Type type = column.getType(); + switch (type) { + case BOOL: + case INT8: + case INT16: + case INT32: + case INT64: + case DATE: + case UNIXTIME_MICROS: + case FLOAT: + case DOUBLE: + case DECIMAL: { + Preconditions.checkArgument(value.length == column.getTypeSize()); + System.arraycopy(value, 0, rowAlloc, + getPositionInRowAllocAndSetBitSet(index), value.length); + break; + } + case VARCHAR: + case STRING: + case BINARY: { + addVarLengthData(index, value); + break; + } + default: + throw new RuntimeException("unreachable"); + } + } + + /** + * Increments the column at the given index, returning {@code false} if the + * value is already the maximum. + * + * @param index the column index to increment + * @return {@code true} if the column is successfully incremented, or {@code false} if + * it is already the maximum value + */ + @SuppressWarnings("BigDecimalEquals") + boolean incrementColumn(int index) { + ColumnSchema column = schema.getColumnByIndex(index); + Type type = column.getType(); + Preconditions.checkState(isSet(index)); + int offset = schema.getColumnOffset(index); + switch (type) { + case BOOL: { + boolean isFalse = rowAlloc[offset] == 0; + rowAlloc[offset] = 1; + return isFalse; + } + case INT8: { + byte existing = rowAlloc[offset]; + if (existing == Byte.MAX_VALUE) { + return false; + } + rowAlloc[offset] = (byte) (existing + 1); + return true; + } + case INT16: { + short existing = Bytes.getShort(rowAlloc, offset); + if (existing == Short.MAX_VALUE) { + return false; + } + Bytes.setShort(rowAlloc, (short) (existing + 1), offset); + return true; + } + case INT32: { + int existing = Bytes.getInt(rowAlloc, offset); + if (existing == Integer.MAX_VALUE) { + return false; + } + Bytes.setInt(rowAlloc, existing + 1, offset); + return true; + } + case DATE: { + int existing = Bytes.getInt(rowAlloc, offset); + if (existing == DateUtil.MAX_DATE_VALUE) { + return false; + } + Bytes.setInt(rowAlloc, existing + 1, offset); + return true; + } + case INT64: + case UNIXTIME_MICROS: { + long existing = Bytes.getLong(rowAlloc, offset); + if (existing == Long.MAX_VALUE) { + return false; + } + Bytes.setLong(rowAlloc, existing + 1, offset); + return true; + } + case FLOAT: { + float existing = Bytes.getFloat(rowAlloc, offset); + float incremented = Math.nextAfter(existing, Float.POSITIVE_INFINITY); + if (existing == incremented) { + return false; + } + Bytes.setFloat(rowAlloc, incremented, offset); + return true; + } + case DOUBLE: { + double existing = Bytes.getDouble(rowAlloc, offset); + double incremented = Math.nextAfter(existing, Double.POSITIVE_INFINITY); + if (existing == incremented) { + return false; + } + Bytes.setDouble(rowAlloc, incremented, offset); + return true; + } + case DECIMAL: { + int precision = column.getTypeAttributes().getPrecision(); + int scale = column.getTypeAttributes().getScale(); + BigDecimal existing = Bytes.getDecimal(rowAlloc, offset, precision, scale); + BigDecimal max = DecimalUtil.maxValue(precision, scale); + if (existing.equals(max)) { + return false; + } + BigDecimal smallest = DecimalUtil.smallestValue(scale); + Bytes.setBigDecimal(rowAlloc, existing.add(smallest), precision, offset); + return true; + } + case VARCHAR: + case STRING: + case BINARY: { + ByteBuffer data = varLengthData.get(index); + data.reset(); + int len = data.limit() - data.position(); + byte[] incremented = new byte[len + 1]; + System.arraycopy(data.array(), data.arrayOffset() + data.position(), incremented, 0, len); + addVarLengthData(index, incremented); + return true; + } + default: + throw new RuntimeException("unreachable"); + } + } + + /** + * Returns {@code true} if the upper row is equal to the incremented lower + * row. Neither row is modified. + * @param lower the lower row + * @param upper the upper, possibly incremented, row + * @param indexes the columns in key order + * @return whether the upper row is equal to the incremented lower row + */ + static boolean isIncremented(PartialRow lower, PartialRow upper, List indexes) { + boolean equals = false; + ListIterator iter = indexes.listIterator(indexes.size()); + while (iter.hasPrevious()) { + int index = iter.previous(); + if (equals) { + if (isCellEqual(lower, upper, index)) { + continue; + } + return false; + } + + if (!lower.isSet(index) && !upper.isSet(index)) { + continue; + } + if (!isCellIncremented(lower, upper, index)) { + return false; + } + equals = true; + } + return equals; + } + + /** + * Checks if the specified cell is equal in both rows. + * @param a a row + * @param b a row + * @param index the column index + * @return {@code true} if the cell values for the given column are equal + */ + @SuppressWarnings("BigDecimalEquals") + private static boolean isCellEqual(PartialRow a, PartialRow b, int index) { + // These checks are perhaps overly restrictive, but right now we only use + // this method for checking fully-set keys. + Preconditions.checkArgument(a.getSchema().equals(b.getSchema())); + Preconditions.checkArgument(a.getSchema().getColumnByIndex(index).isKey()); + Preconditions.checkArgument(a.isSet(index)); + Preconditions.checkArgument(b.isSet(index)); + + ColumnSchema column = a.getSchema().getColumnByIndex(index); + Type type = column.getType(); + int offset = a.getSchema().getColumnOffset(index); + + switch (type) { + case BOOL: + case INT8: + return a.rowAlloc[offset] == b.rowAlloc[offset]; + case INT16: + return Bytes.getShort(a.rowAlloc, offset) == Bytes.getShort(b.rowAlloc, offset); + case DATE: + case INT32: + return Bytes.getInt(a.rowAlloc, offset) == Bytes.getInt(b.rowAlloc, offset); + case INT64: + case UNIXTIME_MICROS: + return Bytes.getLong(a.rowAlloc, offset) == Bytes.getLong(b.rowAlloc, offset); + case FLOAT: + return Bytes.getFloat(a.rowAlloc, offset) == Bytes.getFloat(b.rowAlloc, offset); + case DOUBLE: + return Bytes.getDouble(a.rowAlloc, offset) == Bytes.getDouble(b.rowAlloc, offset); + case DECIMAL: + ColumnTypeAttributes typeAttributes = column.getTypeAttributes(); + int precision = typeAttributes.getPrecision(); + int scale = typeAttributes.getScale(); + return Bytes.getDecimal(a.rowAlloc, offset, precision, scale) + .equals(Bytes.getDecimal(b.rowAlloc, offset, precision, scale)); + case VARCHAR: + case STRING: + case BINARY: { + ByteBuffer dataA = a.varLengthData.get(index).duplicate(); + ByteBuffer dataB = b.varLengthData.get(index).duplicate(); + dataA.reset(); + dataB.reset(); + int lenA = dataA.limit() - dataA.position(); + int lenB = dataB.limit() - dataB.position(); + + if (lenA != lenB) { + return false; + } + for (int i = 0; i < lenA; i++) { + if (dataA.get(dataA.position() + i) != dataB.get(dataB.position() + i)) { + return false; + } + } + return true; + } + default: + throw new RuntimeException("unreachable"); + } + } + + /** + * Checks if the specified cell is in the upper row is an incremented version + * of the cell in the lower row. + * @param lower the lower row + * @param upper the possibly incremented upper row + * @param index the index of the column to check + * @return {@code true} if the column cell value in the upper row is equal to + * the value in the lower row, incremented by one. + */ + @SuppressWarnings("BigDecimalEquals") + private static boolean isCellIncremented(PartialRow lower, PartialRow upper, int index) { + // These checks are perhaps overly restrictive, but right now we only use + // this method for checking fully-set keys. + Preconditions.checkArgument(lower.getSchema().equals(upper.getSchema())); + Preconditions.checkArgument(lower.getSchema().getColumnByIndex(index).isKey()); + Preconditions.checkArgument(lower.isSet(index)); + Preconditions.checkArgument(upper.isSet(index)); + + ColumnSchema column = lower.getSchema().getColumnByIndex(index); + Type type = column.getType(); + int offset = lower.getSchema().getColumnOffset(index); + + switch (type) { + case BOOL: + return lower.rowAlloc[offset] + 1 == upper.rowAlloc[offset]; + case INT8: { + byte val = lower.rowAlloc[offset]; + return val != Byte.MAX_VALUE && val + 1 == upper.rowAlloc[offset]; + } + case INT16: { + short val = Bytes.getShort(lower.rowAlloc, offset); + return val != Short.MAX_VALUE && val + 1 == Bytes.getShort(upper.rowAlloc, offset); + } + case INT32: { + int val = Bytes.getInt(lower.rowAlloc, offset); + return val != Integer.MAX_VALUE && val + 1 == Bytes.getInt(upper.rowAlloc, offset); + } + case DATE: { + int val = Bytes.getInt(lower.rowAlloc, offset); + return val != DateUtil.MAX_DATE_VALUE && val + 1 == Bytes.getInt(upper.rowAlloc, offset); + } + case INT64: + case UNIXTIME_MICROS: { + long val = Bytes.getLong(lower.rowAlloc, offset); + return val != Long.MAX_VALUE && val + 1 == Bytes.getLong(upper.rowAlloc, offset); + } + case FLOAT: { + float val = Bytes.getFloat(lower.rowAlloc, offset); + return val != Float.POSITIVE_INFINITY && + Math.nextAfter(val, Float.POSITIVE_INFINITY) == + Bytes.getFloat(upper.rowAlloc, offset); + } + case DOUBLE: { + double val = Bytes.getDouble(lower.rowAlloc, offset); + return val != Double.POSITIVE_INFINITY && + Math.nextAfter(val, Double.POSITIVE_INFINITY) == + Bytes.getDouble(upper.rowAlloc, offset); + } + case DECIMAL: { + ColumnTypeAttributes typeAttributes = column.getTypeAttributes(); + int precision = typeAttributes.getPrecision(); + int scale = typeAttributes.getScale(); + BigDecimal val = Bytes.getDecimal(lower.rowAlloc, offset, precision, scale); + BigDecimal smallestVal = DecimalUtil.smallestValue(scale); + return val.add(smallestVal).equals( + Bytes.getDecimal(upper.rowAlloc, offset, precision, scale)); + } + case VARCHAR: + case STRING: + case BINARY: { + // Check that b is 1 byte bigger than a, the extra byte is 0, and the other bytes are equal. + ByteBuffer dataA = lower.varLengthData.get(index).duplicate(); + ByteBuffer dataB = upper.varLengthData.get(index).duplicate(); + dataA.reset(); + dataB.reset(); + int lenA = dataA.limit() - dataA.position(); + int lenB = dataB.limit() - dataB.position(); + + if (lenA == Integer.MAX_VALUE || + lenA + 1 != lenB || + dataB.get(dataB.limit() - 1) != 0) { + return false; + } + + for (int i = 0; i < lenA; i++) { + if (dataA.get(dataA.position() + i) != dataB.get(dataB.position() + i)) { + return false; + } + } + return true; + } + default: + throw new RuntimeException("unreachable"); + } + } + + /** + * Get the schema used for this row. + * @return a schema that came from KuduTable + */ + public Schema getSchema() { + return schema; + } + + /** + * Get the byte array that contains all the data added to this partial row. Variable length data + * is contained separately, see {@link #getVarLengthData()}. In their place you'll find their + * index in that list and their size. + * @return a byte array containing the data for this row, except strings + */ + byte[] getRowAlloc() { + return rowAlloc; + } + + /** + * Get the bit set that indicates which columns were set. + * @return a bit set for columns with data + */ + BitSet getColumnsBitSet() { + return columnsBitSet; + } + + /** + * Get the bit set for the columns that were specifically set to null + * @return a bit set for null columns + */ + BitSet getNullsBitSet() { + return nullsBitSet; + } + + /** + * Prevents this PartialRow from being modified again. Can be called multiple times. + */ + void freeze() { + this.frozen = true; + } + + /** + * @return in memory size of this row. + *

+ * Note: the size here is not accurate, as we do not count all the fields, but it is + * enough for most scenarios. + */ + long size() { + long size = (long) rowAlloc.length + columnsBitSet.size() / Byte.SIZE; + if (nullsBitSet != null) { + size += nullsBitSet.size() / Byte.SIZE; + } + for (ByteBuffer bb : varLengthData) { + if (bb != null) { + size += bb.capacity(); + } + } + return size; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Partition.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Partition.java new file mode 100644 index 0000000000..7faaf8a91d --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Partition.java @@ -0,0 +1,320 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import com.google.common.base.Objects; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Schema; + +/** + * A Partition describes the set of rows that a Tablet is responsible for + * serving. Each tablet is assigned a single Partition.

+ * + * Partitions consist primarily of a start and end partition key. Every row with + * a partition key that falls in a Tablet's Partition will be served by that + * tablet.

+ * + * In addition to the start and end partition keys, a Partition holds metadata + * to determine if a scan can prune, or skip, a partition based on the scan's + * start and end primary keys, and predicates. + * + * This class is new, and not considered stable or suitable for public use. + */ +@InterfaceAudience.LimitedPrivate("Impala") +@InterfaceStability.Unstable +public class Partition implements Comparable { + final byte[] partitionKeyStart; + final byte[] partitionKeyEnd; + + final byte[] rangeKeyStart; + final byte[] rangeKeyEnd; + + final List hashBuckets; + + /** + * Size of an encoded hash bucket component in a partition key. + */ + private static final int ENCODED_BUCKET_SIZE = 4; + + /** + * Creates a new partition with the provided start and end keys, and hash buckets. + * @param partitionKeyStart the start partition key + * @param partitionKeyEnd the end partition key + * @param hashBuckets the partition hash buckets + */ + public Partition(byte[] partitionKeyStart, + byte[] partitionKeyEnd, + List hashBuckets) { + this.partitionKeyStart = partitionKeyStart; + this.partitionKeyEnd = partitionKeyEnd; + this.hashBuckets = hashBuckets; + this.rangeKeyStart = rangeKey(partitionKeyStart, hashBuckets.size()); + this.rangeKeyEnd = rangeKey(partitionKeyEnd, hashBuckets.size()); + } + + /** + * Gets the start partition key. + * @return the start partition key + */ + public byte[] getPartitionKeyStart() { + return partitionKeyStart; + } + + /** + * Gets the end partition key. + * @return the end partition key + */ + public byte[] getPartitionKeyEnd() { + return partitionKeyEnd; + } + + /** + * Gets the start range key. + * @return the start range key + */ + public byte[] getRangeKeyStart() { + return rangeKeyStart; + } + + /** + * Gets the decoded start range key. + * @return the decoded start range key + */ + public PartialRow getDecodedRangeKeyStart(KuduTable table) { + Schema schema = table.getSchema(); + if (rangeKeyStart.length == 0) { + return schema.newPartialRow(); + } else { + PartitionSchema partitionSchema = table.getPartitionSchema(); + return KeyEncoder.decodeRangePartitionKey(schema, partitionSchema, rangeKeyStart); + } + } + + /** + * Gets the end range key. + * @return the end range key + */ + public byte[] getRangeKeyEnd() { + return rangeKeyEnd; + } + + /** + * Gets the decoded end range key. + * @return the decoded end range key + */ + public PartialRow getDecodedRangeKeyEnd(KuduTable table) { + Schema schema = table.getSchema(); + if (rangeKeyEnd.length == 0) { + return schema.newPartialRow(); + } else { + PartitionSchema partitionSchema = table.getPartitionSchema(); + return KeyEncoder.decodeRangePartitionKey(schema, partitionSchema, rangeKeyEnd); + } + } + + /** + * Gets the partition hash buckets. + * @return the partition hash buckets + */ + public List getHashBuckets() { + return hashBuckets; + } + + /** + * @return true if the partition is the absolute end partition + */ + public boolean isEndPartition() { + return partitionKeyEnd.length == 0; + } + + /** + * Equality only holds for partitions from the same table. Partition equality only takes into + * account the partition keys, since there is a 1 to 1 correspondence between partition keys and + * the hash buckets and range keys. + * + * @return the hash code + */ + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof Partition)) { + return false; + } + Partition partition = (Partition) o; + return Arrays.equals(partitionKeyStart, partition.partitionKeyStart) && + Arrays.equals(partitionKeyEnd, partition.partitionKeyEnd); + } + + /** + * The hash code only takes into account the partition keys, since there is a 1 to 1 + * correspondence between partition keys and the hash buckets and range keys. + * + * @return the hash code + */ + @Override + public int hashCode() { + return Objects.hashCode(Arrays.hashCode(partitionKeyStart), Arrays.hashCode(partitionKeyEnd)); + } + + /** + * Partition comparison is only reasonable when comparing partitions from the same table, and + * since Kudu does not yet allow partition splitting, no two distinct partitions can have the + * same start partition key. Accordingly, partitions are compared strictly by the start partition + * key. + * + * @param other the other partition of the same table + * @return the comparison of the partitions + */ + @Override + public int compareTo(Partition other) { + return Bytes.memcmp(this.partitionKeyStart, other.partitionKeyStart); + } + + /** + * Returns the range key portion of a partition key given the number of buckets in the partition + * schema. + * @param partitionKey the partition key containing the range key + * @param numHashBuckets the number of hash bucket components of the table + * @return the range key + */ + private static byte[] rangeKey(byte[] partitionKey, int numHashBuckets) { + int bucketsLen = numHashBuckets * ENCODED_BUCKET_SIZE; + if (partitionKey.length > bucketsLen) { + return Arrays.copyOfRange(partitionKey, bucketsLen, partitionKey.length); + } else { + return AsyncKuduClient.EMPTY_ARRAY; + } + } + + @Override + public String toString() { + return String.format("[%s, %s)", + partitionKeyStart.length == 0 ? "" : Bytes.hex(partitionKeyStart), + partitionKeyEnd.length == 0 ? "" : Bytes.hex(partitionKeyEnd)); + } + + /** + * Formats the range partition into a string suitable for debug printing. + * + * @param table that this partition belongs to + * @param showHashInfo whether to output hash schema info per range + * @return a string containing a formatted representation of the range partition + */ + String formatRangePartition(KuduTable table, boolean showHashInfo) { + Schema schema = table.getSchema(); + PartitionSchema partitionSchema = table.getPartitionSchema(); + PartitionSchema.RangeSchema rangeSchema = partitionSchema.getRangeSchema(); + + if (rangeSchema.getColumnIds().isEmpty()) { + return ""; + } + if (rangeKeyStart.length == 0 && rangeKeyEnd.length == 0) { + return "UNBOUNDED"; + } + + List idxs = new ArrayList<>(); + for (int id : partitionSchema.getRangeSchema().getColumnIds()) { + idxs.add(schema.getColumnIndex(id)); + } + + int numColumns = rangeSchema.getColumnIds().size(); + StringBuilder sb = new StringBuilder(); + + if (rangeKeyEnd.length == 0) { + sb.append("VALUES >= "); + if (numColumns > 1) { + sb.append('('); + } + KeyEncoder.decodeRangePartitionKey(schema, partitionSchema, rangeKeyStart) + .appendShortDebugString(idxs, sb); + if (numColumns > 1) { + sb.append(')'); + } + } else if (rangeKeyStart.length == 0) { + sb.append("VALUES < "); + if (numColumns > 1) { + sb.append('('); + } + KeyEncoder.decodeRangePartitionKey(schema, partitionSchema, rangeKeyEnd) + .appendShortDebugString(idxs, sb); + if (numColumns > 1) { + sb.append(')'); + } + } else { + PartialRow lowerBound = + KeyEncoder.decodeRangePartitionKey(schema, partitionSchema, rangeKeyStart); + PartialRow upperBound = + KeyEncoder.decodeRangePartitionKey(schema, partitionSchema, rangeKeyEnd); + + if (PartialRow.isIncremented(lowerBound, upperBound, idxs)) { + sb.append("VALUE = "); + if (numColumns > 1) { + sb.append('('); + } + lowerBound.appendShortDebugString(idxs, sb); + if (numColumns > 1) { + sb.append(')'); + } + } else { + if (numColumns > 1) { + sb.append('('); + } + lowerBound.appendShortDebugString(idxs, sb); + if (numColumns > 1) { + sb.append(')'); + } + sb.append(" <= VALUES < "); + if (numColumns > 1) { + sb.append('('); + } + upperBound.appendShortDebugString(idxs, sb); + if (numColumns > 1) { + sb.append(')'); + } + } + } + + if (showHashInfo) { + List hashSchema = + partitionSchema.getHashSchemaForRange(rangeKeyStart); + for (PartitionSchema.HashBucketSchema hashDimension : hashSchema) { + sb.append(" HASH("); + boolean firstId = true; + for (Integer id : hashDimension.getColumnIds()) { + if (firstId) { + firstId = false; + } else { + sb.append(','); + } + sb.append(schema.getColumnByIndex(schema.getColumnIndex(id)).getName()); + } + sb.append(") PARTITIONS "); + sb.append(hashDimension.getNumBuckets()); + } + } + return sb.toString(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PartitionPruner.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PartitionPruner.java new file mode 100644 index 0000000000..f0a8e76490 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PartitionPruner.java @@ -0,0 +1,739 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collections; +import java.util.Deque; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import javax.annotation.concurrent.NotThreadSafe; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.util.ByteVec; +import org.apache.kudu.util.Pair; + +@InterfaceAudience.Private +@NotThreadSafe +public class PartitionPruner { + + private final Deque> rangePartitions; + + /** + * Constructs a new partition pruner. + * @param rangePartitions the valid partition key ranges, sorted in ascending order + */ + private PartitionPruner(Deque> rangePartitions) { + this.rangePartitions = rangePartitions; + } + + /** + * @return the number of remaining partition ranges for the scan + */ + public int numRangesRemainingForTests() { + return rangePartitions.size(); + } + + /** + * @return a partition pruner that will prune all partitions + */ + private static PartitionPruner empty() { + return new PartitionPruner(new ArrayDeque>()); + } + + /** + * Creates a new partition pruner for the provided scan. + * @param scanner the scan to prune + * @return a partition pruner + */ + public static PartitionPruner create(AbstractKuduScannerBuilder scanner) { + Schema schema = scanner.table.getSchema(); + final PartitionSchema partitionSchema = scanner.table.getPartitionSchema(); + PartitionSchema.RangeSchema rangeSchema = partitionSchema.getRangeSchema(); + Map predicates = scanner.predicates; + + // Check if the scan can be short-circuited entirely by checking the primary + // key bounds and predicates. This also allows us to assume some invariants of the + // scan, such as no None predicates and that the lower bound PK < upper + // bound PK. + if (scanner.upperBoundPrimaryKey.length > 0 && + Bytes.memcmp(scanner.lowerBoundPrimaryKey, scanner.upperBoundPrimaryKey) >= 0) { + return PartitionPruner.empty(); + } + for (KuduPredicate predicate : predicates.values()) { + if (predicate.getType() == KuduPredicate.PredicateType.NONE) { + return PartitionPruner.empty(); + } + } + + // Build a set of partition key ranges which cover the tablets necessary for + // the scan. + // + // Example predicate sets and resulting partition key ranges, based on the + // following tablet schema: + // + // CREATE TABLE t (a INT32, b INT32, c INT32) PRIMARY KEY (a, b, c) + // DISTRIBUTE BY RANGE (c) + // HASH (a) INTO 2 BUCKETS + // HASH (b) INTO 3 BUCKETS; + // + // Assume that hash(0) = 0 and hash(2) = 2. + // + // | Predicates | Partition Key Ranges | + // +------------+--------------------------------------------------------+ + // | a = 0 | [(bucket=0, bucket=2, c=0), (bucket=0, bucket=2, c=1)) | + // | b = 2 | | + // | c = 0 | | + // +------------+--------------------------------------------------------+ + // | a = 0 | [(bucket=0, bucket=2), (bucket=0, bucket=3)) | + // | b = 2 | | + // +------------+--------------------------------------------------------+ + // | a = 0 | [(bucket=0, bucket=0, c=0), (bucket=0, bucket=0, c=1)) | + // | c = 0 | [(bucket=0, bucket=1, c=0), (bucket=0, bucket=1, c=1)) | + // | | [(bucket=0, bucket=2, c=0), (bucket=0, bucket=2, c=1)) | + // +------------+--------------------------------------------------------+ + // | b = 2 | [(bucket=0, bucket=2, c=0), (bucket=0, bucket=2, c=1)) | + // | c = 0 | [(bucket=1, bucket=2, c=0), (bucket=1, bucket=2, c=1)) | + // +------------+--------------------------------------------------------+ + // | a = 0 | [(bucket=0), (bucket=1)) | + // +------------+--------------------------------------------------------+ + // | b = 2 | [(bucket=0, bucket=2), (bucket=0, bucket=3)) | + // | | [(bucket=1, bucket=2), (bucket=1, bucket=3)) | + // +------------+--------------------------------------------------------+ + // | c = 0 | [(bucket=0, bucket=0, c=0), (bucket=0, bucket=0, c=1)) | + // | | [(bucket=0, bucket=1, c=0), (bucket=0, bucket=1, c=1)) | + // | | [(bucket=0, bucket=2, c=0), (bucket=0, bucket=2, c=1)) | + // | | [(bucket=1, bucket=0, c=0), (bucket=1, bucket=0, c=1)) | + // | | [(bucket=1, bucket=1, c=0), (bucket=1, bucket=1, c=1)) | + // | | [(bucket=1, bucket=2, c=0), (bucket=1, bucket=2, c=1)) | + // +------------+--------------------------------------------------------+ + // | None | [(), ()) | + // + // If the partition key is considered as a sequence of the hash bucket + // components and a range component, then a few patterns emerge from the + // examples above: + // + // 1) The partition keys are truncated after the final constrained component + // Hash bucket components are constrained when the scan is limited to a + // subset of buckets via equality or in-list predicates on that component. + // Range components are constrained if they have an upper or lower bound + // via range or equality predicates on that component. + // + // 2) If the final constrained component is a hash bucket, then the + // corresponding bucket in the upper bound is incremented in order to make + // it an exclusive key. + // + // 3) The number of partition key ranges in the result is equal to the product + // of the number of buckets of each unconstrained hash component which come + // before a final constrained component. If there are no unconstrained hash + // components, then the number of resulting partition key ranges is one. Note + // that this can be a lot of ranges, and we may find we need to limit the + // algorithm to give up on pruning if the number of ranges exceeds a limit. + // Until this becomes a problem in practice, we'll continue always pruning, + // since it is precisely these highly-hash-partitioned tables which get the + // most benefit from pruning. + + // Step 1: Build the range portion of the partition key. If the range partition + // columns match the primary key columns, then we can substitute the primary + // key bounds, if they are tighter. + byte[] rangeLowerBound = pushPredsIntoLowerBoundRangeKey(schema, rangeSchema, predicates); + byte[] rangeUpperBound = pushPredsIntoUpperBoundRangeKey(schema, rangeSchema, predicates); + + if (partitionSchema.isSimpleRangePartitioning()) { + if (Bytes.memcmp(rangeLowerBound, scanner.lowerBoundPrimaryKey) < 0) { + rangeLowerBound = scanner.lowerBoundPrimaryKey; + } + if (scanner.upperBoundPrimaryKey.length > 0 && + (rangeUpperBound.length == 0 || + Bytes.memcmp(rangeUpperBound, scanner.upperBoundPrimaryKey) > 0)) { + rangeUpperBound = scanner.upperBoundPrimaryKey; + } + } + // Since the table can contain range-specific hash schemas, it's necessary + // to split the original range into sub-ranges where each subrange comes + // with appropriate hash schema. + List preliminaryRanges = + splitIntoHashSpecificRanges(rangeLowerBound, rangeUpperBound, partitionSchema); + + List> partitionKeyRangeBytes = new ArrayList<>(); + + for (PartitionSchema.EncodedRangeBoundsWithHashSchema preliminaryRange : preliminaryRanges) { + // Step 2: Create the hash bucket portion of the partition key. + final List hashBucketSchemas = + preliminaryRange.hashSchemas; + // List of pruned hash buckets per hash component. + List hashComponents = new ArrayList<>(hashBucketSchemas.size()); + for (PartitionSchema.HashBucketSchema hashSchema : hashBucketSchemas) { + hashComponents.add(pruneHashComponent(schema, hashSchema, predicates)); + } + + // The index of the final constrained component in the partition key. + int constrainedIndex = 0; + if (preliminaryRange.lower.length > 0 || preliminaryRange.upper.length > 0) { + // The range component is constrained if either of the range bounds are + // specified (non-empty). + constrainedIndex = hashBucketSchemas.size(); + } else { + // Search the hash bucket constraints from right to left, looking for the + // first constrained component. + for (int i = hashComponents.size(); i > 0; i--) { + int numBuckets = hashBucketSchemas.get(i - 1).getNumBuckets(); + BitSet hashBuckets = hashComponents.get(i - 1); + if (hashBuckets.nextClearBit(0) < numBuckets) { + constrainedIndex = i; + break; + } + } + } + + // Build up a set of partition key ranges out of the hash components. + // + // Each hash component simply appends its bucket number to the + // partition key ranges (possibly incrementing the upper bound by one bucket + // number if this is the final constraint, see note 2 in the example above). + List> partitionKeyRanges = new ArrayList<>(); + partitionKeyRanges.add(new Pair<>(ByteVec.create(), ByteVec.create())); + + for (int hashIdx = 0; hashIdx < constrainedIndex; hashIdx++) { + // This is the final partition key component if this is the final constrained + // bucket, and the range upper bound is empty. In this case we need to + // increment the bucket on the upper bound to convert from inclusive to + // exclusive. + boolean isLast = hashIdx + 1 == constrainedIndex && preliminaryRange.upper.length == 0; + BitSet hashBuckets = hashComponents.get(hashIdx); + + List> newPartitionKeyRanges = + new ArrayList<>(partitionKeyRanges.size() * hashBuckets.cardinality()); + for (Pair partitionKeyRange : partitionKeyRanges) { + for (int bucket = hashBuckets.nextSetBit(0); + bucket != -1; + bucket = hashBuckets.nextSetBit(bucket + 1)) { + int bucketUpper = isLast ? bucket + 1 : bucket; + ByteVec lower = partitionKeyRange.getFirst().clone(); + ByteVec upper = partitionKeyRange.getFirst().clone(); + KeyEncoder.encodeHashBucket(bucket, lower); + KeyEncoder.encodeHashBucket(bucketUpper, upper); + newPartitionKeyRanges.add(new Pair<>(lower, upper)); + } + } + partitionKeyRanges = newPartitionKeyRanges; + } + + // Step 3: append the (possibly empty) range bounds to the partition key ranges. + for (Pair range : partitionKeyRanges) { + range.getFirst().append(preliminaryRange.lower); + range.getSecond().append(preliminaryRange.upper); + } + + // Step 4: Filter ranges that fall outside the scan's upper and lower bound partition keys. + for (Pair range : partitionKeyRanges) { + byte[] lower = range.getFirst().toArray(); + byte[] upper = range.getSecond().toArray(); + + // Sanity check that the lower bound is less than the upper bound. + assert upper.length == 0 || Bytes.memcmp(lower, upper) < 0; + + // Find the intersection of the ranges. + if (scanner.lowerBoundPartitionKey.length > 0 && + (lower.length == 0 || Bytes.memcmp(lower, scanner.lowerBoundPartitionKey) < 0)) { + lower = scanner.lowerBoundPartitionKey; + } + if (scanner.upperBoundPartitionKey.length > 0 && + (upper.length == 0 || Bytes.memcmp(upper, scanner.upperBoundPartitionKey) > 0)) { + upper = scanner.upperBoundPartitionKey; + } + + // If the intersection is valid, then add it as a range partition. + if (upper.length == 0 || Bytes.memcmp(lower, upper) < 0) { + partitionKeyRangeBytes.add(new Pair<>(lower, upper)); + } + } + } + + // The PartitionPruner's constructor expects the collection to be sorted + // in ascending order. + Collections.sort(partitionKeyRangeBytes, + (lhs, rhs) -> Bytes.memcmp(lhs.getFirst(), rhs.getFirst())); + return new PartitionPruner(new ArrayDeque<>(partitionKeyRangeBytes)); + } + + /** @return {@code true} if there are more range partitions to scan. */ + public boolean hasMorePartitionKeyRanges() { + return !rangePartitions.isEmpty(); + } + + /** @return the inclusive lower bound partition key of the next tablet to scan. */ + public byte[] nextPartitionKey() { + return rangePartitions.getFirst().getFirst(); + } + + /** @return the next range partition key range to scan. */ + public Pair nextPartitionKeyRange() { + return rangePartitions.getFirst(); + } + + /** Removes all partition key ranges through the provided exclusive upper bound. */ + public void removePartitionKeyRange(byte[] upperBound) { + if (upperBound.length == 0) { + rangePartitions.clear(); + return; + } + + while (!rangePartitions.isEmpty()) { + Pair range = rangePartitions.getFirst(); + if (Bytes.memcmp(upperBound, range.getFirst()) <= 0) { + break; + } + rangePartitions.removeFirst(); + if (range.getSecond().length == 0 || Bytes.memcmp(upperBound, range.getSecond()) < 0) { + // The upper bound falls in the middle of this range, so add it back + // with the restricted bounds. + rangePartitions.addFirst(new Pair<>(upperBound, range.getSecond())); + break; + } + } + } + + /** + * @param partition to prune + * @return {@code true} if the partition should be pruned + */ + boolean shouldPruneForTests(Partition partition) { + // The C++ version uses binary search to do this with fewer key comparisons, + // but the algorithm isn't easily translatable, so this just uses a linear + // search. + for (Pair range : rangePartitions) { + + // Continue searching the list of ranges if the partition is greater than + // the current range. + if (range.getSecond().length > 0 && + Bytes.memcmp(range.getSecond(), partition.getPartitionKeyStart()) <= 0) { + continue; + } + + // If the current range is greater than the partitions, + // then the partition should be pruned. + return partition.getPartitionKeyEnd().length > 0 && + Bytes.memcmp(partition.getPartitionKeyEnd(), range.getFirst()) <= 0; + } + + // The partition is greater than all ranges. + return true; + } + + static List idsToIndexesForTest(Schema schema, List ids) { + return idsToIndexes(schema, ids); + } + + private static List idsToIndexes(Schema schema, List ids) { + List indexes = new ArrayList<>(ids.size()); + for (int id : ids) { + indexes.add(schema.getColumnIndex(id)); + } + return indexes; + } + + private static boolean incrementKey(PartialRow row, List keyIndexes) { + for (int i = keyIndexes.size() - 1; i >= 0; i--) { + if (row.incrementColumn(keyIndexes.get(i))) { + return true; + } + } + return false; + } + + /** + * Translates column predicates into a lower bound range partition key. + * @param schema the table schema + * @param rangeSchema the range partition schema + * @param predicates the predicates + * @return a lower bound range partition key + */ + private static byte[] pushPredsIntoLowerBoundRangeKey(Schema schema, + PartitionSchema.RangeSchema rangeSchema, + Map predicates) { + PartialRow row = schema.newPartialRow(); + int pushedPredicates = 0; + + List rangePartitionColumnIdxs = idsToIndexes(schema, rangeSchema.getColumnIds()); + + // Copy predicates into the row in range partition key column order, + // stopping after the first missing predicate. + loop: for (int idx : rangePartitionColumnIdxs) { + ColumnSchema column = schema.getColumnByIndex(idx); + KuduPredicate predicate = predicates.get(column.getName()); + if (predicate == null) { + break; + } + + switch (predicate.getType()) { + case RANGE: + if (predicate.getLower() == null) { + break loop; + } + // fall through + case EQUALITY: + row.setRaw(idx, predicate.getLower()); + pushedPredicates++; + break; + case IS_NOT_NULL: + break loop; + case IN_LIST: + row.setRaw(idx, predicate.getInListValues()[0]); + pushedPredicates++; + break; + default: + throw new IllegalArgumentException( + String.format("unexpected predicate type can not be pushed into key: %s", predicate)); + } + } + + // If no predicates were pushed, no need to do any more work. + if (pushedPredicates == 0) { + return AsyncKuduClient.EMPTY_ARRAY; + } + + // For each remaining column in the partition key, fill it with the minimum value. + Iterator remainingIdxs = rangePartitionColumnIdxs.listIterator(pushedPredicates); + while (remainingIdxs.hasNext()) { + row.setMin(remainingIdxs.next()); + } + + return KeyEncoder.encodeRangePartitionKey(row, rangeSchema); + } + + /** + * Translates column predicates into an upper bound range partition key. + * @param schema the table schema + * @param rangeSchema the range partition schema + * @param predicates the predicates + * @return an upper bound range partition key + */ + private static byte[] pushPredsIntoUpperBoundRangeKey(Schema schema, + PartitionSchema.RangeSchema rangeSchema, + Map predicates) { + PartialRow row = schema.newPartialRow(); + int pushedPredicates = 0; + KuduPredicate finalPredicate = null; + + List rangePartitionColumnIdxs = idsToIndexes(schema, rangeSchema.getColumnIds()); + + // Step 1: copy predicates into the row in range partition key column order, stopping after + // the first missing predicate. + loop: for (int idx : rangePartitionColumnIdxs) { + ColumnSchema column = schema.getColumnByIndex(idx); + KuduPredicate predicate = predicates.get(column.getName()); + if (predicate == null) { + break; + } + + switch (predicate.getType()) { + case EQUALITY: + row.setRaw(idx, predicate.getLower()); + pushedPredicates++; + finalPredicate = predicate; + break; + case RANGE: + if (predicate.getUpper() != null) { + row.setRaw(idx, predicate.getUpper()); + pushedPredicates++; + finalPredicate = predicate; + } + + // After the first column with a range constraint we stop pushing + // constraints into the upper bound. Instead, we push minimum values + // to the remaining columns (below), which is the maximally tight + // constraint. + break loop; + case IS_NOT_NULL: + break loop; + case IN_LIST: { + byte[][] values = predicate.getInListValues(); + row.setRaw(idx, values[values.length - 1]); + pushedPredicates++; + finalPredicate = predicate; + break; + } + default: + throw new IllegalArgumentException( + String.format("unexpected predicate type can not be pushed into key: %s", predicate)); + } + } + + // If no predicates were pushed, no need to do any more work. + if (pushedPredicates == 0) { + return AsyncKuduClient.EMPTY_ARRAY; + } + + // Step 2: If the final predicate is an equality or IN-list predicate, increment the + // key to convert it to an exclusive upper bound. + if (finalPredicate.getType() == KuduPredicate.PredicateType.EQUALITY || + finalPredicate.getType() == KuduPredicate.PredicateType.IN_LIST) { + // If the increment fails then this bound is is not constraining the keyspace. + if (!incrementKey(row, rangePartitionColumnIdxs.subList(0, pushedPredicates))) { + return AsyncKuduClient.EMPTY_ARRAY; + } + } + + // Step 3: Fill the remaining columns without predicates with the min value. + Iterator remainingIdxs = rangePartitionColumnIdxs.listIterator(pushedPredicates); + while (remainingIdxs.hasNext()) { + row.setMin(remainingIdxs.next()); + } + + return KeyEncoder.encodeRangePartitionKey(row, rangeSchema); + } + + static List splitIntoHashSpecificRanges( + byte[] scanLowerBound, byte[] scanUpperBound, PartitionSchema ps) { + final List ranges = + ps.getEncodedRangesWithHashSchemas(); + final List tableWideHashSchema = + ps.getHashBucketSchemas(); + + // If there aren't any ranges with custom hash schemas or there isn't an + // intersection between the set of ranges with custom hash schemas and the + // scan range, the result is trivial: the whole scan range is attributed + // to the table-wide hash schema. + if (ranges.isEmpty()) { + return ImmutableList.of(new PartitionSchema.EncodedRangeBoundsWithHashSchema( + scanLowerBound, scanUpperBound, tableWideHashSchema)); + } + + { + final byte[] rangesLowerBound = ranges.get(0).lower; + final byte[] rangesUpperBound = ranges.get(ranges.size() - 1).upper; + + if ((scanUpperBound.length != 0 && + Bytes.memcmp(scanUpperBound, rangesLowerBound) <= 0) || + (scanLowerBound.length != 0 && rangesUpperBound.length != 0 && + Bytes.memcmp(rangesUpperBound, scanLowerBound) <= 0)) { + return ImmutableList.of(new PartitionSchema.EncodedRangeBoundsWithHashSchema( + scanLowerBound, scanUpperBound, tableWideHashSchema)); + } + } + + // Index of the known range with custom hash schema that the iterator is + // currently pointing at or about to point if the iterator is currently + // at the scan boundary. + int curIdx = -1; + + // Find the first range that is at or after the specified bounds. + // TODO(aserbin): maybe, do this in PartitionSchema with O(ln(N)) complexity? + for (int idx = 0; idx < ranges.size(); ++idx) { + final PartitionSchema.EncodedRangeBoundsWithHashSchema range = ranges.get(idx); + + // Searching for the first range that is at or after the lower scan bound. + if (curIdx >= 0 || + (range.upper.length != 0 && Bytes.memcmp(range.upper, scanLowerBound) <= 0)) { + continue; + } + curIdx = idx; + } + + Preconditions.checkState(curIdx >= 0); + Preconditions.checkState(curIdx < ranges.size()); + + // Current position of the iterator. + byte[] curPoint = scanLowerBound; + + // Iterate over the scan range from one known boundary to the next one, + // enumerating the resulting consecutive sub-ranges and attributing each + // sub-range to a proper hash schema. If that's a known range with custom hash + // schema, it's attributed to its range-specific hash schema; otherwise, + // a sub-range is attributed to the table-wide hash schema. + List result = new ArrayList<>(); + while (curIdx < ranges.size() && + (Bytes.memcmp(curPoint, scanUpperBound) < 0 || scanUpperBound.length == 0)) { + // Check the disposition of cur_point related to the lower boundary + // of the range pointed to by 'cur_idx'. + final PartitionSchema.EncodedRangeBoundsWithHashSchema curRange = ranges.get(curIdx); + if (Bytes.memcmp(curPoint, curRange.lower) < 0) { + // The iterator is before the current range: + // |---| + // ^ + // The next known bound is either the upper bound of the current range + // or the upper bound of the scan. + byte[] upperBound; + if (scanUpperBound.length == 0) { + upperBound = curRange.lower; + } else { + if (Bytes.memcmp(curRange.lower, scanUpperBound) < 0) { + upperBound = curRange.lower; + } else { + upperBound = scanUpperBound; + } + } + result.add(new PartitionSchema.EncodedRangeBoundsWithHashSchema( + curPoint, upperBound, tableWideHashSchema)); + // Not advancing the 'cur_idx' since cur_point is either at the beginning + // of the range or before it at the upper bound of the scan. + } else if (Bytes.memcmp(curPoint, curRange.lower) == 0) { + // The iterator is at the lower boundary of the current range: + // |---| + // ^ + if ((curRange.upper.length != 0 && Bytes.memcmp(curRange.upper, scanUpperBound) <= 0) || + scanUpperBound.length == 0) { + // The current range is withing the scan boundaries. + result.add(curRange); + } else { + // The current range spans over the upper bound of the scan. + result.add(new PartitionSchema.EncodedRangeBoundsWithHashSchema( + curPoint, scanUpperBound, curRange.hashSchemas)); + } + // Done with the current range, advance to the next one, if any. + ++curIdx; + } else { + if ((scanUpperBound.length != 0 && Bytes.memcmp(scanUpperBound, curRange.upper) <= 0) || + curRange.upper.length == 0) { + result.add(new PartitionSchema.EncodedRangeBoundsWithHashSchema( + curPoint, scanUpperBound, curRange.hashSchemas)); + } else { + result.add(new PartitionSchema.EncodedRangeBoundsWithHashSchema( + curPoint, curRange.upper, curRange.hashSchemas)); + } + // Done with the current range, advance to the next one, if any. + ++curIdx; + } + Preconditions.checkState(!result.isEmpty()); + // Advance the iterator. + curPoint = result.get(result.size() - 1).upper; + } + + // If exiting from the cycle above by the 'cur_idx < ranges.size()' condition, + // check if the upper bound of the scan is beyond the upper bound of the last + // range with custom hash schema. If so, add an extra range that spans from + // the upper bound of the last range to the upper bound of the scan. + Preconditions.checkState(!result.isEmpty()); + final byte[] rangesUpperBound = result.get(result.size() - 1).upper; + if (Bytes.memcmp(rangesUpperBound, scanUpperBound) != 0) { + Preconditions.checkState(Bytes.memcmp(curPoint, rangesUpperBound) == 0); + result.add(new PartitionSchema.EncodedRangeBoundsWithHashSchema( + curPoint, scanUpperBound, tableWideHashSchema)); + } + + return result; + } + + // Just for test. + static BitSet pruneHashComponentV2ForTest(Schema schema, + PartitionSchema.HashBucketSchema hashSchema, + Map predicates) { + return pruneHashComponent(schema, hashSchema, predicates); + } + + /** + * Search all combinations of in-list and equality predicates for prunable hash partitions. + * The method is an optimized version of 'TestPartitionPruner::pruneHashComponent' + * space-complexity wise. + * @return a bitset containing {@code false} bits for hash buckets which may be pruned + */ + private static BitSet pruneHashComponent(Schema schema, + PartitionSchema.HashBucketSchema hashSchema, + Map predicates) { + BitSet hashBuckets = new BitSet(hashSchema.getNumBuckets()); + List columnIdxs = idsToIndexes(schema, hashSchema.getColumnIds()); + List> predicateValueList = new ArrayList<>(); + for (int idx : columnIdxs) { + ColumnSchema column = schema.getColumnByIndex(idx); + KuduPredicate predicate = predicates.get(column.getName()); + if (predicate == null || + (predicate.getType() != KuduPredicate.PredicateType.EQUALITY && + predicate.getType() != KuduPredicate.PredicateType.IN_LIST)) { + hashBuckets.set(0, hashSchema.getNumBuckets()); + return hashBuckets; + } + + List predicateValues; + if (predicate.getType() == KuduPredicate.PredicateType.EQUALITY) { + predicateValues = Collections.singletonList(predicate.getLower()); + } else { + predicateValues = Arrays.asList(predicate.getInListValues()); + } + predicateValueList.add(predicateValues); + } + List valuesCombination = new ArrayList<>(); + computeHashBuckets(schema, hashSchema, hashBuckets, + columnIdxs, predicateValueList, valuesCombination); + return hashBuckets; + } + + /** + * pick all combinations and compute their hashes. + * @param schema the table schema + * @param hashSchema the hash partition schema. + * @param hashBuckets the result of this algorithm, a bit 0 means a partition can be pruned + * @param columnIdxs column indexes of columns in the hash partition schema + * @param predicateValueList values in in-list predicates of these columns + * @param valuesCombination a combination of in-list and equality predicates + */ + private static void computeHashBuckets(Schema schema, + PartitionSchema.HashBucketSchema hashSchema, + BitSet hashBuckets, + List columnIdxs, + List> predicateValueList, + List valuesCombination) { + if (hashBuckets.cardinality() == hashSchema.getNumBuckets()) { + return; + } + int level = valuesCombination.size(); + if (level == columnIdxs.size()) { + // This 'valuesCombination' is a picked combination value for computing hash bucket. + // + // The algorithm is an argorithm like DFS, which pick value for every column in + // 'predicateValueList', 'valuesCombination' is the picked values. + // + // The valuesCombination is a value list picked by followings algorithm: + // 1. pick a value from predicateValueList[0] for the column who columnIdxs[0] + // stand for. Every value in predicateValueList[0] can be picked. + // The count of pick method is predicateValueList[0].size(). + // 2. pick a value from predicateValueList[1] for the column who columnIdxs[1] + // stand for. + // The count of pick method is predicateValueList[1].size(). + // 3. Do this like step 1,2 until the last one column' value picked in + // 'predicateValueList[columnIdx.size()-1]' columnIdx[columnIdx.size()-1] stand for. + // + // The algorithm ends when all combinations has been searched. + // 'valuesCombination' saves a combination values of in-list predicates. + // So we use the 'valuesCombination' to construct a row, then compute its hash bucket. + PartialRow row = schema.newPartialRow(); + for (int i = 0; i < valuesCombination.size(); i++) { + row.setRaw(columnIdxs.get(i), valuesCombination.get(i)); + } + int hash = KeyEncoder.getHashBucket(row, hashSchema); + hashBuckets.set(hash); + return; + } + for (int i = 0; i < predicateValueList.get(level).size(); i++) { + valuesCombination.add(predicateValueList.get(level).get(i)); + computeHashBuckets(schema, hashSchema, hashBuckets, + columnIdxs, predicateValueList, valuesCombination); + valuesCombination.remove(valuesCombination.size() - 1); + } + } + +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PartitionSchema.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PartitionSchema.java new file mode 100644 index 0000000000..50851a4ae7 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PartitionSchema.java @@ -0,0 +1,312 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.TreeSet; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.primitives.UnsignedBytes; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Schema; + +/** + * A partition schema describes how the rows of a table are distributed among + * tablets. + * + * Primarily, a table's partition schema is responsible for translating the + * primary key column values of a row into a partition key that can be used to + * find the tablet containing the key. + * + * In case of table-wide hash partitioning, the partition schema is made up of + * zero or more hash bucket components, followed by a single range component. + * In case of custom hash bucketing per range, the partition schema contains + * information on hash bucket components per range. + * + * Each hash bucket component includes one or more columns from the primary key + * column set, with the restriction that an individual primary key column may + * only be included in a single hash component. + * + * This class is new, and not considered stable or suitable for public use. + */ +@InterfaceAudience.LimitedPrivate("Impala") +@InterfaceStability.Unstable +public class PartitionSchema { + + private static final class BoundsComparator + implements Comparator, Serializable { + private static final long serialVersionUID = 36028797018963969L; + private static final Comparator comparator = + UnsignedBytes.lexicographicalComparator(); + + @Override + public int compare(EncodedRangeBoundsWithHashSchema lhs, + EncodedRangeBoundsWithHashSchema rhs) { + return comparator.compare(lhs.lower, rhs.lower); + } + } + + private static final Comparator COMPARATOR = + new BoundsComparator(); + + private final RangeSchema rangeSchema; + private final List hashBucketSchemas; + private final List rangesWithHashSchemas; + private final List encodedRangesWithHashSchemas; + private TreeSet hashSchemasPerRange; + private final boolean isSimple; + + static class EncodedRangeBoundsWithHashSchema { + final byte[] lower; + final byte[] upper; + final List hashSchemas; + + public EncodedRangeBoundsWithHashSchema( + byte[] lower, + byte[] upper, + List hashSchemas) { + Preconditions.checkNotNull(lower); + Preconditions.checkNotNull(upper); + Preconditions.checkState(upper.length == 0 || Bytes.memcmp(lower, upper) < 0); + this.lower = lower; + this.upper = upper; + this.hashSchemas = hashSchemas; + } + } + + /** + * Creates a new partition schema from the range and hash bucket schemas. + * + * @param rangeSchema the range schema + * @param hashBucketSchemas the table-wide hash schema + * @param schema the table schema + */ + public PartitionSchema(RangeSchema rangeSchema, + List hashBucketSchemas, + List rangesWithHashSchemas, + Schema schema) { + this.rangeSchema = rangeSchema; + this.hashBucketSchemas = hashBucketSchemas; + this.rangesWithHashSchemas = rangesWithHashSchemas; + this.hashSchemasPerRange = new TreeSet<>(COMPARATOR); + this.encodedRangesWithHashSchemas = new ArrayList<>(rangesWithHashSchemas.size()); + + for (RangeWithHashSchema rhs : this.rangesWithHashSchemas) { + final boolean isLowerBoundEmpty = + rhs.lowerBound == null || rhs.lowerBound.getColumnsBitSet().isEmpty(); + byte[] lower = isLowerBoundEmpty ? new byte[0] + : KeyEncoder.encodeRangePartitionKey(rhs.lowerBound, this.rangeSchema); + final boolean isUpperBoundEmpty = + rhs.upperBound == null || rhs.upperBound.getColumnsBitSet().isEmpty(); + byte[] upper = isUpperBoundEmpty ? new byte[0] + : KeyEncoder.encodeRangePartitionKey(rhs.upperBound, this.rangeSchema); + if (!hashSchemasPerRange.add( + new EncodedRangeBoundsWithHashSchema(lower, upper, rhs.hashSchemas))) { + throw new IllegalArgumentException( + rhs.lowerBound.toString() + ": duplicate lower range boundary"); + } + } + + // Populate the convenience collection storing the information on ranges + // with encoded bounds sorted in ascending order by lower bounds. + encodedRangesWithHashSchemas.addAll(this.hashSchemasPerRange); + + boolean isSimple = + rangesWithHashSchemas.isEmpty() && + hashBucketSchemas.isEmpty() && + rangeSchema.columns.size() == schema.getPrimaryKeyColumnCount(); + if (isSimple) { + int i = 0; + for (Integer id : rangeSchema.columns) { + if (schema.getColumnIndex(id) != i++) { + isSimple = false; + break; + } + } + } + this.isSimple = isSimple; + } + + /** + * Creates a new partition schema from the range and hash bucket schemas. + * + * @param rangeSchema the range schema + * @param hashBucketSchemas the table-wide hash schema + * @param schema the table schema + */ + public PartitionSchema(RangeSchema rangeSchema, + List hashBucketSchemas, + Schema schema) { + this(rangeSchema, hashBucketSchemas, ImmutableList.of(), schema); + } + + /** + * Returns the encoded partition key of the row. + * @return a byte array containing the encoded partition key of the row + */ + public byte[] encodePartitionKey(PartialRow row) { + return KeyEncoder.encodePartitionKey(row, this); + } + + public RangeSchema getRangeSchema() { + return rangeSchema; + } + + public List getHashBucketSchemas() { + return hashBucketSchemas; + } + + public List getRangesWithHashSchemas() { + return rangesWithHashSchemas; + } + + List getEncodedRangesWithHashSchemas() { + return encodedRangesWithHashSchemas; + } + + /** + * Returns true if the partition schema if the partition schema does not include any hash + * components, and the range columns match the table's primary key columns. + * + * @return whether the partition schema is the default simple range partitioning. + */ + boolean isSimpleRangePartitioning() { + return isSimple; + } + + /** + * @return whether the partition schema has ranges with custom hash schemas. + */ + boolean hasCustomHashSchemas() { + return !rangesWithHashSchemas.isEmpty(); + } + + /** + * Find hash schema for the given encoded range key. Depending on the + * partition schema and the key, it might be either table-wide or a custom + * hash schema for a particular range. Just as a convention, this method + * returns the table-wide hash schema for keys in non-covered ranges. + * + * @return hash bucket schema for the encoded range key + */ + List getHashSchemaForRange(byte[] rangeKey) { + if (!hasCustomHashSchemas()) { + // By definition, the table-wide hash schema provides the hash bucketing + // structure in the absence of per-range custom hash schemas. + return hashBucketSchemas; + } + + final EncodedRangeBoundsWithHashSchema entry = hashSchemasPerRange.floor( + new EncodedRangeBoundsWithHashSchema(rangeKey, new byte[0], ImmutableList.of())); + if (entry == null) { + return hashBucketSchemas; + } + // Check if 'rangeKey' is in the range. + // NOTE: the right boundary is exclusive; an empty array for upper boundary + // means that the range partition is unbounded. + final byte[] upper = entry.upper; + Preconditions.checkNotNull(upper); + if (upper.length == 0 || Bytes.memcmp(rangeKey, upper) < 0) { + return entry.hashSchemas; + } + return hashBucketSchemas; + } + + public static class RangeSchema { + private final List columns; + + public RangeSchema(List columns) { + this.columns = columns; + } + + /** + * Gets the column IDs of the columns in the range partition. + * @return the column IDs of the columns in the range partition + * @deprecated Use {@link #getColumnIds} instead. + */ + @Deprecated + public List getColumns() { + return columns; + } + + /** + * Gets the column IDs of the columns in the range partition. + * @return the column IDs of the columns in the range partition + */ + public List getColumnIds() { + return columns; + } + } + + public static class HashBucketSchema { + private final List columnIds; + private int numBuckets; + private int seed; + + public HashBucketSchema(List columnIds, int numBuckets, int seed) { + this.columnIds = columnIds; + this.numBuckets = numBuckets; + this.seed = seed; + } + + /** + * Gets the column IDs of the columns in the hash partition. + * @return the column IDs of the columns in the hash partition + */ + public List getColumnIds() { + return columnIds; + } + + public int getNumBuckets() { + return numBuckets; + } + + public int getSeed() { + return seed; + } + } + + /** + * This utility class is used to represent information on a custom hash schema + * for a particular range. + */ + public static class RangeWithHashSchema { + public PartialRow lowerBound; + public PartialRow upperBound; + public List hashSchemas; + + public RangeWithHashSchema( + PartialRow lowerBound, + PartialRow upperBound, + List hashSchemas) { + Preconditions.checkNotNull(lowerBound); + Preconditions.checkNotNull(upperBound); + Preconditions.checkArgument( + lowerBound.getSchema().equals(upperBound.getSchema())); + this.lowerBound = lowerBound; + this.upperBound = upperBound; + this.hashSchemas = hashSchemas; + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PingRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PingRequest.java new file mode 100644 index 0000000000..6651cc0468 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PingRequest.java @@ -0,0 +1,101 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.master.Master; +import org.apache.kudu.util.Pair; + +/** + * Ping request only used for tests to test connections. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +class PingRequest extends KuduRpc { + + private final String serviceName; + private final List requiredFeatures = new ArrayList<>(); + + static PingRequest makeMasterPingRequest() { + return makeMasterPingRequest(null, null, 0); + } + + static PingRequest makeMasterPingRequest(KuduTable masterTable, Timer timer, long timeoutMillis) { + return new PingRequest(masterTable, MASTER_SERVICE_NAME, timer, timeoutMillis); + } + + static PingRequest makeTabletServerPingRequest() { + return new PingRequest(TABLET_SERVER_SERVICE_NAME, null, 0); + } + + private PingRequest(String serviceName, Timer timer, long timeoutMillis) { + this(null, serviceName, timer, timeoutMillis); + } + + private PingRequest(KuduTable table, String serviceName, Timer timer, long timeoutMillis) { + super(table, timer, timeoutMillis); + this.serviceName = serviceName; + } + + /** + * Add an application-specific feature flag required to service the RPC. + * This can be useful on the Ping request to check if a service supports a feature. + * The server will respond with an RpcRemoteException if a feature is not supported. + */ + void addRequiredFeature(Integer feature) { + requiredFeatures.add(feature); + } + + @Override + Collection getRequiredFeatures() { + return requiredFeatures; + } + + @Override + Message createRequestPB() { + return Master.PingRequestPB.getDefaultInstance(); + } + + @Override + String serviceName() { + return serviceName; + } + + @Override + String method() { + return "Ping"; + } + + @Override + Pair deserialize(CallResponse callResponse, String tsUUID) + throws KuduException { + final Master.PingResponsePB.Builder respBuilder = + Master.PingResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), respBuilder); + PingResponse response = new PingResponse(timeoutTracker.getElapsedMillis(), tsUUID); + return new Pair<>(response, null); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PingResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PingResponse.java new file mode 100644 index 0000000000..e4f5195e50 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PingResponse.java @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +class PingResponse extends KuduRpcResponse { + + /** + * Constructor with information common to all RPCs. + * + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param tsUUID a string that contains the UUID of the server that answered the RPC + */ + PingResponse(long elapsedMillis, String tsUUID) { + super(elapsedMillis, tsUUID); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PleaseThrottleException.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PleaseThrottleException.java new file mode 100644 index 0000000000..52d15ae26c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/PleaseThrottleException.java @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.kudu.client; + +import com.stumbleupon.async.Deferred; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * This exception notifies the application to throttle its use of Kudu. + *

+ * Since all APIs of {@link AsyncKuduSession} are asynchronous and non-blocking, + * it's possible that the application would produce RPCs at a rate higher + * than Kudu is able to handle. When this happens, {@link AsyncKuduSession} + * will typically do some buffering up to a certain point beyond which RPCs + * will fail-fast with this exception, to prevent the application from + * running itself out of memory. + *

+ * This exception is expected to be handled by having the application + * throttle or pause itself for a short period of time before retrying the + * RPC that failed with this exception as well as before sending other RPCs. + * The reason this exception inherits from {@link NonRecoverableException} + * instead of {@link RecoverableException} is that the usual course of action + * when handling a {@link RecoverableException} is to retry right away, which + * would defeat the whole purpose of this exception. Here, we want the + * application to retry after a reasonable delay as well as throttle + * the pace of creation of new RPCs. What constitutes a "reasonable + * delay" depends on the nature of RPCs and rate at which they're produced. + *

+ * One effective strategy to handle this exception is to set a flag to true + * when this exception is first emitted that causes the application to pause + * or throttle its use of Kudu. Then you can retry the RPC that failed + * (which is accessible through {@link #getFailedRpc}) and add a callback to + * it in order to unset the flag once the RPC completes successfully. + * Note that low-throughput applications will typically rarely (if ever) + * hit this exception, so they don't need complex throttling logic. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +@SuppressWarnings("serial") +public final class PleaseThrottleException extends RecoverableException + implements HasFailedRpcException { + + /** The RPC that was failed with this exception. */ + private final transient Operation rpc; + + /** A deferred one can wait on before retrying the failed RPC. */ + private final transient Deferred deferred; + + /** + * Constructor. + * @param status status object containing the reason for the exception + * @param cause The exception that requires the application to throttle + * itself (can be {@code null}) + * @param rpc The RPC that was made to fail with this exception + * @param deferred A deferred one can wait on before retrying the failed RPC + */ + PleaseThrottleException(Status status, + KuduException cause, + Operation rpc, + Deferred deferred) { + super(status, cause); + this.rpc = rpc; + this.deferred = deferred; + } + + /** + * The RPC that was made to fail with this exception. + */ + @Override + public Operation getFailedRpc() { + return rpc; + } + + /** + * Returns a deferred one can wait on before retrying the failed RPC. + * @since 1.3 + */ + public Deferred getDeferred() { + return deferred; + } + +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ProtobufHelper.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ProtobufHelper.java new file mode 100644 index 0000000000..a9c015bb05 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ProtobufHelper.java @@ -0,0 +1,546 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.sql.Date; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import com.google.protobuf.ByteString; +import com.google.protobuf.UnsafeByteOperations; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnTypeAttributes; +import org.apache.kudu.Common; +import org.apache.kudu.RowOperations; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.util.DateUtil; +import org.apache.kudu.util.DecimalUtil; + +@InterfaceAudience.Private +public class ProtobufHelper { + + /** + * The flags that are not included while serializing. + */ + public enum SchemaPBConversionFlags { + SCHEMA_PB_WITHOUT_COMMENT, + SCHEMA_PB_WITHOUT_ID + } + + /** + * Utility method to convert a Schema to its wire format. + * @param schema Schema to convert + * @return a list of ColumnSchemaPB + */ + public static List schemaToListPb(Schema schema) { + return schemaToListPb(schema, EnumSet.noneOf(SchemaPBConversionFlags.class)); + } + + public static List schemaToListPb(Schema schema, + EnumSet flags) { + ArrayList columns = new ArrayList<>(schema.getColumnCount()); + Common.ColumnSchemaPB.Builder schemaBuilder = Common.ColumnSchemaPB.newBuilder(); + for (ColumnSchema col : schema.getColumns()) { + int id = schema.hasColumnIds() ? schema.getColumnId(col.getName()) : -1; + columns.add(columnToPb(schemaBuilder, id, col, flags)); + schemaBuilder.clear(); + } + return columns; + } + + public static Common.SchemaPB schemaToPb(Schema schema) { + return schemaToPb(schema, EnumSet.noneOf(SchemaPBConversionFlags.class)); + } + + public static Common.SchemaPB schemaToPb(Schema schema, + EnumSet flags) { + Common.SchemaPB.Builder builder = Common.SchemaPB.newBuilder(); + builder.addAllColumns(schemaToListPb(schema, flags)); + return builder.build(); + } + + public static Common.ColumnSchemaPB columnToPb(ColumnSchema column) { + return columnToPb(Common.ColumnSchemaPB.newBuilder(), -1, column); + } + + public static Common.ColumnSchemaPB columnToPb(Common.ColumnSchemaPB.Builder schemaBuilder, + int colId, + ColumnSchema column) { + return columnToPb(schemaBuilder, + colId, + column, + EnumSet.noneOf(SchemaPBConversionFlags.class)); + } + + public static Common.ColumnSchemaPB columnToPb(Common.ColumnSchemaPB.Builder schemaBuilder, + int colId, + ColumnSchema column, + EnumSet flags) { + schemaBuilder + .setName(column.getName()) + .setType(column.getWireType()) + .setIsKey(column.isKey()) + .setIsNullable(column.isNullable()) + .setImmutable(column.isImmutable()) + .setIsAutoIncrementing(column.isAutoIncrementing()) + .setCfileBlockSize(column.getDesiredBlockSize()); + + if (!flags.contains(SchemaPBConversionFlags.SCHEMA_PB_WITHOUT_ID) && colId >= 0) { + schemaBuilder.setId(colId); + } + if (column.getEncoding() != null) { + schemaBuilder.setEncoding(column.getEncoding().getInternalPbType()); + } + if (column.getCompressionAlgorithm() != null) { + schemaBuilder.setCompression(column.getCompressionAlgorithm().getInternalPbType()); + } + if (column.getDefaultValue() != null) { + schemaBuilder.setReadDefaultValue(UnsafeByteOperations.unsafeWrap( + objectToWireFormat(column, column.getDefaultValue()))); + } + if (column.getTypeAttributes() != null) { + schemaBuilder.setTypeAttributes( + columnTypeAttributesToPb(Common.ColumnTypeAttributesPB.newBuilder(), column)); + } + if (!flags.contains(SchemaPBConversionFlags.SCHEMA_PB_WITHOUT_COMMENT) && + !column.getComment().isEmpty()) { + schemaBuilder.setComment(column.getComment()); + } + return schemaBuilder.build(); + } + + public static Common.ColumnTypeAttributesPB columnTypeAttributesToPb( + Common.ColumnTypeAttributesPB.Builder builder, ColumnSchema column) { + ColumnTypeAttributes typeAttributes = column.getTypeAttributes(); + if (typeAttributes.hasPrecision()) { + builder.setPrecision(typeAttributes.getPrecision()); + } + if (typeAttributes.hasScale()) { + builder.setScale(typeAttributes.getScale()); + } + if (typeAttributes.hasLength()) { + builder.setLength(typeAttributes.getLength()); + } + return builder.build(); + } + + public static ColumnSchema pbToColumnSchema(Common.ColumnSchemaPB pb) { + return pbToColumnSchema(pb, true); + } + + public static ColumnSchema pbToColumnSchema(Common.ColumnSchemaPB pb, + boolean isKeyUnique) { + ColumnSchema.Encoding encoding = ColumnSchema.Encoding.valueOf(pb.getEncoding().name()); + ColumnSchema.CompressionAlgorithm compressionAlgorithm = + ColumnSchema.CompressionAlgorithm.valueOf(pb.getCompression().name()); + int desiredBlockSize = pb.getCfileBlockSize(); + + if (pb.getIsAutoIncrementing()) { + // Set encoding, compression algorithm, block size and comment from 'pb' parameter + return new ColumnSchema.AutoIncrementingColumnSchemaBuilder() + .encoding(encoding) + .compressionAlgorithm(compressionAlgorithm) + .desiredBlockSize(desiredBlockSize) + .comment(pb.getComment()) + .build(); + } + + Type type = Type.getTypeForDataType(pb.getType()); + ColumnTypeAttributes typeAttributes = pb.hasTypeAttributes() ? + pbToColumnTypeAttributes(pb.getTypeAttributes()) : null; + Object defaultValue = pb.hasWriteDefaultValue() ? + byteStringToObject(type, typeAttributes, pb.getWriteDefaultValue()) : null; + ColumnSchema.ColumnSchemaBuilder csb = + new ColumnSchema.ColumnSchemaBuilder(pb.getName(), type); + if (pb.getIsKey() && isKeyUnique) { + csb.key(true); + } else { + csb.nonUniqueKey(pb.getIsKey()); + } + return csb.nullable(pb.getIsNullable()) + .immutable(pb.getImmutable()) + .defaultValue(defaultValue) + .encoding(encoding) + .compressionAlgorithm(compressionAlgorithm) + .desiredBlockSize(desiredBlockSize) + .typeAttributes(typeAttributes) + .comment(pb.getComment()) + .build(); + } + + public static ColumnTypeAttributes pbToColumnTypeAttributes(Common.ColumnTypeAttributesPB pb) { + ColumnTypeAttributes.ColumnTypeAttributesBuilder builder = + new ColumnTypeAttributes.ColumnTypeAttributesBuilder(); + if (pb.hasPrecision()) { + builder.precision(pb.getPrecision()); + } + if (pb.hasScale()) { + builder.scale(pb.getScale()); + } + if (pb.hasLength()) { + builder.length(pb.getLength()); + } + return builder.build(); + } + + public static Schema pbToSchema(Common.SchemaPB schema) { + // Since ColumnSchema.keyUnique in run-time structures is not persistent in Kudu + // server, we need to find if the table has auto-incrementing column first, and set + // all key columns as non unique key columns if the table has auto-incrementing + // column. + boolean hasAutoIncrementing = false; + for (Common.ColumnSchemaPB columnPb : schema.getColumnsList()) { + if (columnPb.getIsAutoIncrementing()) { + hasAutoIncrementing = true; + break; + } + } + List columns = new ArrayList<>(schema.getColumnsCount()); + List columnIds = new ArrayList<>(schema.getColumnsCount()); + for (Common.ColumnSchemaPB columnPb : schema.getColumnsList()) { + // Key is not unique if hasAutoIncrementing is true. + columns.add(pbToColumnSchema(columnPb, !hasAutoIncrementing)); + int id = columnPb.getId(); + if (id < 0) { + throw new IllegalArgumentException("Illegal column ID: " + id); + } + columnIds.add(id); + } + return new Schema(columns, columnIds); + } + + /** + * Factory method for creating a {@code PartitionSchema} from a protobuf message. + * + * @param pb the partition schema protobuf message + * @return a partition instance + */ + public static PartitionSchema pbToPartitionSchema(Common.PartitionSchemaPB pb, Schema schema) { + List rangeColumns = pbToIds(pb.getRangeSchema().getColumnsList()); + PartitionSchema.RangeSchema rangeSchema = new PartitionSchema.RangeSchema(rangeColumns); + + ImmutableList.Builder hashSchemas = ImmutableList.builder(); + + for (Common.PartitionSchemaPB.HashBucketSchemaPB hashBucketSchemaPB + : pb.getHashSchemaList()) { + List hashColumnIds = pbToIds(hashBucketSchemaPB.getColumnsList()); + + PartitionSchema.HashBucketSchema hashSchema = + new PartitionSchema.HashBucketSchema(hashColumnIds, + hashBucketSchemaPB.getNumBuckets(), + hashBucketSchemaPB.getSeed()); + + hashSchemas.add(hashSchema); + } + + // Populate the list of ranges with custom hash schemas. + ImmutableList.Builder rangesWithHashSchemas = + ImmutableList.builder(); + + for (Common.PartitionSchemaPB.RangeWithHashSchemaPB rhsPB : + pb.getCustomHashSchemaRangesList()) { + List rangeHashSchemas = new ArrayList<>(); + for (Common.PartitionSchemaPB.HashBucketSchemaPB hbs : rhsPB.getHashSchemaList()) { + rangeHashSchemas.add(new PartitionSchema.HashBucketSchema( + pbToIds(hbs.getColumnsList()), hbs.getNumBuckets(), hbs.getSeed())); + } + + // Decode RowOperationsPB into the range bounds. + final RowOperations.RowOperationsPB rangeBounds = rhsPB.getRangeBounds(); + Operation.OperationsDecoder dec = new Operation.OperationsDecoder(); + final List partitions = dec.decodeRangePartitions(rangeBounds, schema); + if (partitions.size() != 1) { + throw new IllegalArgumentException("unexpected range bounds"); + } + final RangePartition p = partitions.get(0); + + PartitionSchema.RangeWithHashSchema rhs = + new PartitionSchema.RangeWithHashSchema( + p.lowerBound, p.upperBound, rangeHashSchemas); + rangesWithHashSchemas.add(rhs); + } + + return new PartitionSchema( + rangeSchema, hashSchemas.build(), rangesWithHashSchemas.build(), schema); + } + + public static Common.PartitionSchemaPB partitionSchemaToPb(PartitionSchema partitionSchema) { + Common.PartitionSchemaPB.Builder builder = Common.PartitionSchemaPB.newBuilder(); + + for (PartitionSchema.HashBucketSchema hashBucketSchema : + partitionSchema.getHashBucketSchemas()) { + Common.PartitionSchemaPB.HashBucketSchemaPB.Builder hbsBuilder = + Common.PartitionSchemaPB.HashBucketSchemaPB.newBuilder() + .addAllColumns(idsToPb(hashBucketSchema.getColumnIds())) + .setNumBuckets(hashBucketSchema.getNumBuckets()) + .setSeed(hashBucketSchema.getSeed()); + builder.addHashSchema(hbsBuilder.build()); + } + + Common.PartitionSchemaPB.RangeSchemaPB rangeSchemaPB = + Common.PartitionSchemaPB.RangeSchemaPB.newBuilder() + .addAllColumns(idsToPb(partitionSchema.getRangeSchema().getColumnIds())) + .build(); + builder.setRangeSchema(rangeSchemaPB); + + // Based on the list of ranges with custom hash schemas, populate the + // PartitionSchemaPB.custom_hash_schema_ranges field. + for (PartitionSchema.RangeWithHashSchema rhs : partitionSchema.getRangesWithHashSchemas()) { + Common.PartitionSchemaPB.RangeWithHashSchemaPB.Builder rhsBuilder = + Common.PartitionSchemaPB.RangeWithHashSchemaPB.newBuilder(); + for (PartitionSchema.HashBucketSchema hbs : rhs.hashSchemas) { + Common.PartitionSchemaPB.HashBucketSchemaPB.Builder hbsBuilder = + Common.PartitionSchemaPB.HashBucketSchemaPB.newBuilder() + .addAllColumns(idsToPb(hbs.getColumnIds())) + .setNumBuckets(hbs.getNumBuckets()) + .setSeed(hbs.getSeed()); + rhsBuilder.addHashSchema(hbsBuilder.build()); + } + + rhsBuilder.setRangeBounds(new Operation.OperationsEncoder().encodeLowerAndUpperBounds( + rhs.lowerBound, + rhs.upperBound, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND)); + builder.addCustomHashSchemaRanges(rhsBuilder.build()); + } + + return builder.build(); + } + + /** + * Constructs a new {@code Partition} instance from the a protobuf message. + * @param pb the protobuf message + * @return the {@code Partition} corresponding to the message + */ + static Partition pbToPartition(Common.PartitionPB pb) { + return new Partition(pb.getPartitionKeyStart().toByteArray(), + pb.getPartitionKeyEnd().toByteArray(), + pb.getHashBucketsList()); + } + + static Common.PartitionPB partitionToPb(Partition partition) { + return Common.PartitionPB.newBuilder() + .setPartitionKeyStart(ByteString.copyFrom(partition.getPartitionKeyStart())) + .setPartitionKeyEnd(ByteString.copyFrom(partition.getPartitionKeyEnd())) + .addAllHashBuckets(partition.getHashBuckets()) + .build(); + } + + /** + * Deserializes a list of column identifier protobufs into a list of column IDs. This method + * relies on the fact that the master will aways send a partition schema with column IDs, and not + * column names (column names are only used when the client is sending the partition schema to + * the master as part of the create table process). + * + * @param columnIdentifiers the column identifiers + * @return the column IDs + */ + private static List pbToIds( + List columnIdentifiers) { + ImmutableList.Builder columnIds = ImmutableList.builder(); + for (Common.PartitionSchemaPB.ColumnIdentifierPB column : columnIdentifiers) { + switch (column.getIdentifierCase()) { + case ID: + columnIds.add(column.getId()); + break; + case NAME: + throw new IllegalArgumentException( + String.format("Expected column ID from master: %s", column)); + case IDENTIFIER_NOT_SET: + throw new IllegalArgumentException("Unknown column: " + column); + default: + throw new IllegalArgumentException("Unknown identifier type!"); + } + } + return columnIds.build(); + } + + /** + * Serializes a list of column IDs into a list of column identifier protobufs. + * + * @param columnIds the column IDs + * @return the column identifiers + */ + private static List idsToPb( + List columnIds) { + ImmutableList.Builder columnIdentifiers = + ImmutableList.builder(); + for (Integer id : columnIds) { + Common.PartitionSchemaPB.ColumnIdentifierPB columnIdentifierPB = + Common.PartitionSchemaPB.ColumnIdentifierPB.newBuilder().setId(id).build(); + columnIdentifiers.add(columnIdentifierPB); + } + return columnIdentifiers.build(); + } + + private static byte[] objectToWireFormat(ColumnSchema col, Object value) { + switch (col.getType()) { + case BOOL: + return Bytes.fromBoolean((Boolean) value); + case INT8: + return new byte[] {(Byte) value}; + case INT16: + return Bytes.fromShort((Short) value); + case DATE: + return Bytes.fromInt(DateUtil.sqlDateToEpochDays((Date) value)); + case INT32: + return Bytes.fromInt((Integer) value); + case INT64: + case UNIXTIME_MICROS: + return Bytes.fromLong((Long) value); + case VARCHAR: + case STRING: + return ((String) value).getBytes(UTF_8); + case BINARY: + return (byte[]) value; + case FLOAT: + return Bytes.fromFloat((Float) value); + case DOUBLE: + return Bytes.fromDouble((Double) value); + case DECIMAL: + return Bytes.fromBigDecimal((BigDecimal) value, col.getTypeAttributes().getPrecision()); + default: + throw new IllegalArgumentException("The column " + col.getName() + " is of type " + col + .getType() + " which is unknown"); + } + } + + private static Object byteStringToObject(Type type, ColumnTypeAttributes typeAttributes, + ByteString value) { + ByteBuffer buf = value.asReadOnlyByteBuffer(); + buf.order(ByteOrder.LITTLE_ENDIAN); + switch (type) { + case BOOL: + return buf.get() != 0; + case INT8: + return buf.get(); + case INT16: + return buf.getShort(); + case DATE: + return DateUtil.epochDaysToSqlDate(buf.getInt()); + case INT32: + return buf.getInt(); + case INT64: + case UNIXTIME_MICROS: + return buf.getLong(); + case FLOAT: + return buf.getFloat(); + case DOUBLE: + return buf.getDouble(); + case VARCHAR: + case STRING: + return value.toStringUtf8(); + case BINARY: + return value.toByteArray(); + case DECIMAL: + return Bytes.getDecimal(value.toByteArray(), + typeAttributes.getPrecision(), typeAttributes.getScale()); + default: + throw new IllegalArgumentException("This type is unknown: " + type); + } + } + + /** + * Serializes an object based on its Java type. Used for Alter Column + * operations where the column's type is not available. `value` must be + * a Kudu-compatible type or else throws {@link IllegalArgumentException}. + * + * @param colName the name of the column (for the error message) + * @param value the value to serialize + * @return the serialized object + */ + protected static ByteString objectToByteStringNoType(String colName, Object value) { + byte[] bytes; + if (value instanceof Boolean) { + bytes = Bytes.fromBoolean((Boolean) value); + } else if (value instanceof Byte) { + bytes = new byte[] {(Byte) value}; + } else if (value instanceof Short) { + bytes = Bytes.fromShort((Short) value); + } else if (value instanceof Integer) { + bytes = Bytes.fromInt((Integer) value); + } else if (value instanceof Long) { + bytes = Bytes.fromLong((Long) value); + } else if (value instanceof String) { + bytes = ((String) value).getBytes(UTF_8); + } else if (value instanceof byte[]) { + bytes = (byte[]) value; + } else if (value instanceof ByteBuffer) { + bytes = ((ByteBuffer) value).array(); + } else if (value instanceof Float) { + bytes = Bytes.fromFloat((Float) value); + } else if (value instanceof Double) { + bytes = Bytes.fromDouble((Double) value); + } else if (value instanceof BigDecimal) { + bytes = Bytes.fromBigDecimal((BigDecimal) value, DecimalUtil.MAX_DECIMAL_PRECISION); + } else { + throw new IllegalArgumentException("The default value provided for " + + "column " + colName + " is of class " + value.getClass().getName() + + " which does not map to a supported Kudu type"); + } + return UnsafeByteOperations.unsafeWrap(bytes); + } + + /** + * Convert a {@link HostAndPort} to + * {@link org.apache.kudu.Common.HostPortPB} + * protobuf message for serialization. + * @param hostAndPort The host and port object. Both host and port must be specified. + * @return An initialized HostPortPB object. + */ + public static Common.HostPortPB hostAndPortToPB(HostAndPort hostAndPort) { + return Common.HostPortPB.newBuilder() + .setHost(hostAndPort.getHost()) + .setPort(hostAndPort.getPort()) + .build(); + } + + /** + * Convert a {@link org.apache.kudu.Common.HostPortPB} to + * {@link HostAndPort}. + * @param hostPortPB The fully initialized HostPortPB object. + * Must have both host and port specified. + * @return An initialized initialized HostAndPort object. + */ + public static HostAndPort hostAndPortFromPB(Common.HostPortPB hostPortPB) { + return new HostAndPort(hostPortPB.getHost(), hostPortPB.getPort()); + } + + /** + * Convert a list of HostPortPBs into a comma-separated string. + */ + public static String hostPortPbListToString(List pbs) { + List strs = new ArrayList<>(pbs.size()); + for (Common.HostPortPB pb : pbs) { + strs.add(pb.getHost() + ":" + pb.getPort()); + } + return Joiner.on(',').join(strs); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RangePartition.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RangePartition.java new file mode 100644 index 0000000000..ff633f6ce4 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RangePartition.java @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * This class represents a range partition schema with table-wide hash schema. + * + * See also RangePartitionWithCustomHashSchema. + */ +@InterfaceAudience.LimitedPrivate({"kudu-backup", "Test"}) +@InterfaceStability.Evolving +public class RangePartition { + final PartialRow lowerBound; + final PartialRow upperBound; + final RangePartitionBound lowerBoundType; + final RangePartitionBound upperBoundType; + + public RangePartition(PartialRow lowerBound, + PartialRow upperBound, + RangePartitionBound lowerBoundType, + RangePartitionBound upperBoundType) { + Preconditions.checkNotNull(lowerBound); + Preconditions.checkNotNull(upperBound); + Preconditions.checkArgument( + lowerBound.getSchema().equals(upperBound.getSchema())); + this.lowerBound = lowerBound; + this.upperBound = upperBound; + this.lowerBoundType = lowerBoundType; + this.upperBoundType = upperBoundType; + } + + public PartialRow getLowerBound() { + return lowerBound; + } + + public RangePartitionBound getLowerBoundType() { + return lowerBoundType; + } + + public PartialRow getUpperBound() { + return upperBound; + } + + public RangePartitionBound getUpperBoundType() { + return upperBoundType; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RangePartitionBound.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RangePartitionBound.java new file mode 100644 index 0000000000..aebc7780a8 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RangePartitionBound.java @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** Specifies whether a range partition bound is inclusive or exclusive. */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum RangePartitionBound { + /** An exclusive range partition bound. */ + EXCLUSIVE_BOUND, + /** An inclusive range partition bound. */ + INCLUSIVE_BOUND, +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RangePartitionWithCustomHashSchema.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RangePartitionWithCustomHashSchema.java new file mode 100644 index 0000000000..df3c0c6761 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RangePartitionWithCustomHashSchema.java @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.List; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Common; + +/** + * This class represents a range partition with custom hash bucketing schema. + * + * See also RangePartition. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class RangePartitionWithCustomHashSchema extends RangePartition { + // Using the corresponding PB type to represent this range with its custom + // hash schema. + private Common.PartitionSchemaPB.RangeWithHashSchemaPB.Builder pb = + Common.PartitionSchemaPB.RangeWithHashSchemaPB.newBuilder(); + + /** + * @param lowerBound upper bound of the range partition + * @param upperBound lower bound of the range partition + * @param lowerBoundType lower bound type: inclusive/exclusive + * @param upperBoundType upper bound type: inclusive/exclusive + * @return new RangePartitionWithCustomHashSchema object + */ + public RangePartitionWithCustomHashSchema( + PartialRow lowerBound, + PartialRow upperBound, + RangePartitionBound lowerBoundType, + RangePartitionBound upperBoundType) { + super(lowerBound, upperBound, lowerBoundType, upperBoundType); + pb.setRangeBounds( + new Operation.OperationsEncoder().encodeLowerAndUpperBounds( + lowerBound, upperBound, lowerBoundType, upperBoundType)); + } + + /** + * Add a level of hash sub-partitioning for this range partition. + * + * The hash schema for the range partition is defined by the whole set of + * its hash sub-partitioning levels. A range partition can have zero or + * multiple levels of hash sub-partitioning: this method can be called + * many times on the same object to define a multi-dimensional hash + * bucketing structure for the range. + * + * @param columns name of table's columns to use for hash bucketing + * @param numBuckets number of buckets used by the hash function + * @param seed the seed for the hash function + * @return this RangePartition object modified accordingly + */ + public RangePartition addHashPartitions( + List columns, int numBuckets, int seed) { + Common.PartitionSchemaPB.HashBucketSchemaPB.Builder b = + pb.addHashSchemaBuilder(); + for (String column : columns) { + b.addColumnsBuilder().setName(column); + } + b.setNumBuckets(numBuckets); + b.setSeed(seed); + return this; + } + + public Common.PartitionSchemaPB.RangeWithHashSchemaPB toPB() { + return pb.build(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RecallDeletedTableRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RecallDeletedTableRequest.java new file mode 100644 index 0000000000..61f00f4039 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RecallDeletedTableRequest.java @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Message; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.master.Master; +import org.apache.kudu.util.Pair; + +/** + * RPC to recall tables + */ +@InterfaceAudience.Private +class RecallDeletedTableRequest extends KuduRpc { + + static final String RECALL_DELETED_TABLE = "RecallDeletedTable"; + + private final String newTableName; + + private final String id; + + RecallDeletedTableRequest(KuduTable table, + String id, + String newTableName, + Timer timer, + long timeoutMillis) { + super(table, timer, timeoutMillis); + this.id = id; + this.newTableName = newTableName; + } + + @Override + Message createRequestPB() { + final Master.RecallDeletedTableRequestPB.Builder builder = + Master.RecallDeletedTableRequestPB.newBuilder(); + builder.setTable(Master.TableIdentifierPB.newBuilder() + .setTableId(ByteString.copyFromUtf8(id))); + if (!newTableName.isEmpty()) { + builder.setNewTableName(newTableName); + } + return builder.build(); + } + + @Override + String serviceName() { + return MASTER_SERVICE_NAME; + } + + @Override + String method() { + return RECALL_DELETED_TABLE; + } + + @Override + Pair deserialize(CallResponse callResponse, + String tsUUID) throws KuduException { + final Master.RecallDeletedTableResponsePB.Builder builder = + Master.RecallDeletedTableResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), builder); + RecallDeletedTableResponse response = + new RecallDeletedTableResponse(timeoutTracker.getElapsedMillis(), tsUUID); + return new Pair( + response, builder.hasError() ? builder.getError() : null); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RecallDeletedTableResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RecallDeletedTableResponse.java new file mode 100644 index 0000000000..c30fa051e1 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RecallDeletedTableResponse.java @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class RecallDeletedTableResponse extends KuduRpcResponse { + + /** + * @param elapsedMillis Time in milliseconds since RPC creation to now. + */ + RecallDeletedTableResponse(long elapsedMillis, String tsUUID) { + super(elapsedMillis, tsUUID); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RecoverableException.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RecoverableException.java new file mode 100644 index 0000000000..700e83288c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RecoverableException.java @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * An exception that's possible to retry. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +@SuppressWarnings("serial") +class RecoverableException extends KuduException { + + /** + * Constructor. + * @param status status object containing the reason for the exception + * trace. + */ + RecoverableException(Status status) { + super(status); + } + + /** + * Constructor. + * @param status status object containing the reason for the exception + * @param cause The exception that caused this one to be thrown. + */ + RecoverableException(Status status, Throwable cause) { + super(status, cause); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RemoteTablet.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RemoteTablet.java new file mode 100644 index 0000000000..2c035df741 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RemoteTablet.java @@ -0,0 +1,345 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.Nullable; +import javax.annotation.concurrent.GuardedBy; + +import com.google.common.base.Joiner; +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; +import com.google.common.collect.ComparisonChain; +import com.google.common.collect.ImmutableList; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.consensus.Metadata; + +/** + * This class encapsulates the information regarding a tablet and its locations. + *

+ * RemoteTablet's main function is to keep track of where the leader for this + * tablet is. For example, an RPC might call {@link #getLeaderServerInfo()}, contact that TS, find + * it's not the leader anymore, and then call {@link #demoteLeader(String)}. + *

+ * A RemoteTablet's life is expected to be long in a cluster where roles aren't changing often, + * and short when they do since the Kudu client will replace the RemoteTablet it caches with new + * ones after getting tablet locations from the master. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class RemoteTablet implements Comparable { + + private static final Logger LOG = LoggerFactory.getLogger(RemoteTablet.class); + + // This random integer is used when making any random choice for replica + // selection. It is static to provide a deterministic selection for any given + // process and therefore also better cache affinity while ensuring that we can + // still benefit from spreading the load across replicas for other processes + // and applications. + private static final int RANDOM_SELECTION_INT = new Random().nextInt(Integer.MAX_VALUE); + + private final String tableId; + private final String tabletId; + @GuardedBy("tabletServers") + private final Map tabletServers; + private final AtomicReference> replicas = + new AtomicReference<>(ImmutableList.of()); + private final Partition partition; + + @GuardedBy("tabletServers") + private String leaderUuid; + + RemoteTablet(String tableId, + String tabletId, + Partition partition, + List replicas, + List serverInfos) { + Preconditions.checkArgument(replicas.size() == serverInfos.size(), + "the number of replicas does not equal the number of servers"); + this.tabletId = tabletId; + this.tableId = tableId; + this.partition = partition; + this.tabletServers = new HashMap<>(serverInfos.size()); + + for (ServerInfo serverInfo : serverInfos) { + this.tabletServers.put(serverInfo.getUuid(), serverInfo); + } + + ImmutableList.Builder replicasBuilder = new ImmutableList.Builder<>(); + for (int i = 0; i < replicas.size(); ++i) { + replicasBuilder.add(replicas.get(i)); + if (replicas.get(i).getRoleAsEnum().equals(Metadata.RaftPeerPB.Role.LEADER)) { + this.leaderUuid = serverInfos.get(i).getUuid(); + } + } + + if (leaderUuid == null) { + LOG.warn("No leader provided for tablet {}", getTabletId()); + } + this.replicas.set(replicasBuilder.build()); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(tabletId).append("@["); + List tsStrings; + synchronized (tabletServers) { + tsStrings = new ArrayList<>(tabletServers.size()); + for (ServerInfo e : tabletServers.values()) { + String flag = e.getUuid().equals(leaderUuid) ? "[L]" : ""; + tsStrings.add(e.toString() + flag); + } + } + // Sort so that we have a consistent iteration order regardless of + // HashSet ordering. + Collections.sort(tsStrings); + sb.append(Joiner.on(',').join(tsStrings)); + sb.append(']'); + return sb.toString(); + } + + /** + * Removes the passed tablet server from this tablet's list of tablet servers. + * @param uuid a tablet server to remove from this cache + * @return true if this method removed ts from the list, else false + */ + boolean removeTabletClient(String uuid) { + synchronized (tabletServers) { + if (leaderUuid != null && leaderUuid.equals(uuid)) { + leaderUuid = null; + } + // TODO(ghenke): Should this also remove the related replica? + // As it stands there can be a replica with a missing tablet server. + if (tabletServers.remove(uuid) != null) { + return true; + } + LOG.debug("tablet {} already removed ts {}, size left is {}", + getTabletId(), uuid, tabletServers.size()); + return false; + } + } + + /** + * Clears the leader UUID if the passed tablet server is the current leader. + * If it is the current leader, then the next call to this tablet will have + * to query the master to find the new leader. + * @param uuid a tablet server that gave a sign that it isn't this tablet's leader + */ + void demoteLeader(String uuid) { + synchronized (tabletServers) { + if (leaderUuid == null) { + LOG.debug("{} couldn't be demoted as the leader for {}, there is no known leader", + uuid, getTabletId()); + return; + } + + if (leaderUuid.equals(uuid)) { + leaderUuid = null; + LOG.debug("{} was demoted as the leader for {}", uuid, getTabletId()); + } else { + LOG.debug("{} wasn't the leader for {}, current leader is {}", uuid, + getTabletId(), leaderUuid); + } + } + } + + /** + * Get the information on the tablet server that we think holds the leader replica for this + * tablet. + * + * @return information on a tablet server that we think has the leader, else null + */ + @Nullable + ServerInfo getLeaderServerInfo() { + synchronized (tabletServers) { + return tabletServers.get(leaderUuid); + } + } + + /** + * Get the information on the closest server. Servers are ranked from closest to furthest as + * follows: + * - Local servers + * - Servers in the same location as the client + * - All other servers + * + * @param location the location of the client + * @return the information for a closest server, or null if this cache doesn't know any servers. + */ + @Nullable + ServerInfo getClosestServerInfo(String location) { + // This method returns + // 1. a randomly picked server among local servers, if there is one based + // on IP and assigned location, or + // 2. a randomly picked server in the same assigned location, if there is a + // server in the same location, or, finally, + // 3. a randomly picked server among all tablet servers. + // TODO(wdberkeley): Eventually, the client might use the hierarchical + // structure of a location to determine proximity. + // NOTE: this is the same logic implemented in client-internal.cc. + synchronized (tabletServers) { + if (tabletServers.isEmpty()) { + return null; + } + ServerInfo result = null; + List localServers = new ArrayList<>(); + List serversInSameLocation = new ArrayList<>(); + int randomIndex = RANDOM_SELECTION_INT % tabletServers.size(); + int index = 0; + for (ServerInfo e : tabletServers.values()) { + boolean serverInSameLocation = !location.isEmpty() && e.inSameLocation(location); + + // Only consider a server "local" if we're in the same location, or if + // there is missing location info. + if (location.isEmpty() || e.getLocation().isEmpty() || + serverInSameLocation) { + if (e.isLocal()) { + localServers.add(e); + } + } + if (serverInSameLocation) { + serversInSameLocation.add(e); + } + if (index == randomIndex) { + result = e; + } + index++; + } + if (!localServers.isEmpty()) { + randomIndex = RANDOM_SELECTION_INT % localServers.size(); + return localServers.get(randomIndex); + } + if (!serversInSameLocation.isEmpty()) { + randomIndex = RANDOM_SELECTION_INT % serversInSameLocation.size(); + return serversInSameLocation.get(randomIndex); + } + return result; + } + } + + /** + * Helper function to centralize the calling of methods based on the passed replica selection + * mechanism. + * + * @param replicaSelection replica selection mechanism to use + * @param location the location of the client + * @return information on the server that matches the selection, can be null + */ + @Nullable + ServerInfo getReplicaSelectedServerInfo(ReplicaSelection replicaSelection, String location) { + switch (replicaSelection) { + case LEADER_ONLY: + return getLeaderServerInfo(); + case CLOSEST_REPLICA: + return getClosestServerInfo(location); + default: + throw new RuntimeException("unknown replica selection mechanism " + replicaSelection); + } + } + + /** + * Get replicas of this tablet. The returned list may not be mutated. + * + * This list of replicas may include replicas for servers that have been + * removed via `removeTabletClient`, therefore won't be returned via + * `getTabletServersCopy`. + * + * @return the replicas of the tablet + */ + List getReplicas() { + return replicas.get(); + } + + public String getTableId() { + return tableId; + } + + String getTabletId() { + return tabletId; + } + + public Partition getPartition() { + return partition; + } + + byte[] getTabletIdAsBytes() { + return tabletId.getBytes(UTF_8); + } + + List getTabletServersCopy() { + List results = new ArrayList<>(); + synchronized (tabletServers) { + results.addAll(tabletServers.values()); + } + return results; + } + + /** + * Get information on tablet server by its UUID. + * @param uuid tablet server uuid. + * @return tablet server info by the uuid. + */ + ServerInfo getTabletServerByUuid(String uuid) { + synchronized (tabletServers) { + return tabletServers.get(uuid); + } + } + + @Override + public int compareTo(RemoteTablet remoteTablet) { + if (remoteTablet == null) { + return 1; + } + + return ComparisonChain.start() + .compare(this.tableId, remoteTablet.tableId) + .compare(this.partition, remoteTablet.partition).result(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof RemoteTablet)) { + return false; + } + + RemoteTablet that = (RemoteTablet) o; + + return this.compareTo(that) == 0; + } + + @Override + public int hashCode() { + return Objects.hashCode(tableId, partition); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ReplicaSelection.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ReplicaSelection.java new file mode 100644 index 0000000000..def81d341b --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ReplicaSelection.java @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Policy with which to choose amongst multiple replicas. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum ReplicaSelection { + /** + * Select the LEADER replica. + */ + LEADER_ONLY, + /** + * Select the closest replica to the client. Replicas are classified from closest to furthest as + * follows: + * - Local replicas + * - Replicas whose tablet server has the same location as the client + * - All other replicas + */ + CLOSEST_REPLICA +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RequestTracker.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RequestTracker.java new file mode 100644 index 0000000000..a66e2a7f82 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RequestTracker.java @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.TreeSet; +import javax.annotation.concurrent.GuardedBy; + +import org.apache.yetus.audience.InterfaceAudience; + +/** + * This is the same class as src/kudu/rpc/request_tracker.h. + */ +@InterfaceAudience.Private +public class RequestTracker { + private final Object lock = new Object(); + + @GuardedBy("lock") + private long nextSeqNo = 1; + @GuardedBy("lock") + private final TreeSet incompleteRpcs = new TreeSet<>(); + + static final long NO_SEQ_NO = -1; + + private final String clientId; + + /** + * Create a new request tracker for the given client id. + * @param clientId identifier for the client this tracker belongs to + */ + public RequestTracker(String clientId) { + this.clientId = clientId; + } + + /** + * Generates a new sequence number and tracks it. + * @return a new sequence number + */ + public long newSeqNo() { + synchronized (lock) { + long seq = nextSeqNo++; + incompleteRpcs.add(seq); + return seq; + } + } + + /** + * Returns the oldest sequence number that wasn't marked as completed. If there is no incomplete + * RPC then {@link RequestTracker#NO_SEQ_NO} is returned. + * @return the first incomplete sequence number + */ + public long firstIncomplete() { + synchronized (lock) { + if (incompleteRpcs.isEmpty()) { + return NO_SEQ_NO; + } + return incompleteRpcs.first(); + } + } + + /** + * Marks the given sequence id as complete. The provided sequence ID must be a valid + * number that was previously returned by {@link #newSeqNo()}. It is illegal to call + * this method twice with the same sequence number. + * @param sequenceId the sequence id to mark as complete + */ + public void rpcCompleted(long sequenceId) { + assert sequenceId != NO_SEQ_NO; + synchronized (lock) { + boolean removed = incompleteRpcs.remove(sequenceId); + assert (removed) : "Could not remove seqid " + sequenceId + " from request tracker"; + } + } + + public String getClientId() { + return clientId; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ResourceMetrics.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ResourceMetrics.java new file mode 100644 index 0000000000..1e7e7cb225 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ResourceMetrics.java @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static com.google.protobuf.Descriptors.FieldDescriptor.JavaType; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.LongAdder; +import java.util.stream.Collectors; + +import com.google.common.base.Preconditions; +import com.google.protobuf.Descriptors.FieldDescriptor; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.tserver.Tserver.ResourceMetricsPB; + +/** + * A container for scanner resource metrics. + *

+ * This class wraps a mapping from metric name to metric value for server-side + * metrics associated with a scanner and write operation. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class ResourceMetrics { + private Map metrics = new ConcurrentHashMap<>(); + + /** + * Returns a copy of this ResourceMetrics's underlying map of metric name to + * metric value. + * @return a map of metric name to metric value + */ + public Map get() { + return metrics.entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().sum())); + } + + /** + * Returns the value of the metric named by 'name', or 0 if there is no such metric. + * @param name the name of the metric to get the value for + * @return the value of the named metric; if the metric is not found, returns 0 + */ + public long getMetric(String name) { + return metrics.getOrDefault(name, new LongAdder()).sum(); + } + + /** + * Increment this instance's metric values with those found in 'resourceMetricsPb'. + * @param resourceMetricsPb resource metrics protobuf object to be used to update this object + */ + void update(ResourceMetricsPB resourceMetricsPb) { + Preconditions.checkNotNull(resourceMetricsPb); + for (Map.Entry entry : resourceMetricsPb.getAllFields().entrySet()) { + FieldDescriptor field = entry.getKey(); + if (field.getJavaType() == JavaType.LONG) { + increment(field.getName(), (Long) entry.getValue()); + } + } + } + + /** + * Increment this instance's metric values with those found in 'resourceMetrics'. + * Noop if 'resourceMetrics' is null. + * @param resourceMetrics resource metrics protobuf object to be used to update this object. + * Can be null, which will not do anything. + */ + void update(ResourceMetrics resourceMetrics) { + if (resourceMetrics != null) { + for (Map.Entry entry : resourceMetrics.metrics.entrySet()) { + increment(entry.getKey(), entry.getValue().sum()); + } + } + } + + /** + * Increment the metric value by the specific amount. + * @param name the name of the metric whose value is to be incremented + * @param amount the amount to increment the value by + */ + private void increment(String name, long amount) { + metrics.computeIfAbsent(name, k -> new LongAdder()).add(amount); + } + + /** + * Converts a ResourceMetricsPB into a ResourceMetrics. + * @param resourceMetricsPb a resource metrics in its PB format. Must not be null. + * @return a ResourceMetrics + */ + static ResourceMetrics fromResourceMetricsPB(ResourceMetricsPB resourceMetricsPb) { + Preconditions.checkNotNull(resourceMetricsPb); + ResourceMetrics result = new ResourceMetrics(); + result.update(resourceMetricsPb); + return result; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowError.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowError.java new file mode 100644 index 0000000000..ef6bd0aa32 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowError.java @@ -0,0 +1,121 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.WireProtocol; +import org.apache.kudu.tserver.Tserver; + +/** + * Wrapper class for a single row error. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class RowError { + private final Status status; + private final Operation operation; + private final String tsUUID; + + /** + * Creates a new {@code RowError} with the provided status, operation, and tablet server UUID. + */ + RowError(Status status, Operation operation, String tsUUID) { + this.status = status; + this.operation = operation; + this.tsUUID = tsUUID; + } + + /** + * Creates a new {@code RowError} with the provided status, and operation. + * + * This constructor should be used when the operation fails before the tablet + * lookup is complete. + */ + RowError(Status status, Operation operation) { + this(status, operation, null); + } + + /** + * Get the status code and message of the row error. + */ + public Status getErrorStatus() { + return status; + } + + /** + * Get the string-representation of the error code that the tablet server returned. + * @return A short string representation of the error. + * @deprecated Please use getErrorStatus() instead. Will be removed in a future version. + */ + @Deprecated + public String getStatus() { + return status.getCodeName(); + } + + /** + * Get the error message the tablet server sent. + * @return The error message. + * @deprecated Please use getErrorStatus() instead. Will be removed in a future version. + */ + @Deprecated + public String getMessage() { + return status.getMessage(); + } + + /** + * Get the Operation that failed. + * @return The same Operation instance that failed + */ + public Operation getOperation() { + return operation; + } + + /** + * Get the identifier of the tablet server that sent the error. + * The UUID may be {@code null} if the failure occurred before sending the row + * to a tablet server (for instance, if the row falls in a non-covered range partition). + * @return A string containing a UUID + */ + public String getTsUUID() { + return tsUUID; + } + + @Override + public String toString() { + // Intentionally not redacting the row key to make this more useful. + return "Row error for row=" + operation.getRow() + + ", tablet=" + operation.getTablet() + + ", server=" + tsUUID + + ", status=" + status.toString(); + } + + /** + * Converts a PerRowErrorPB into a RowError. + * @param errorPB a row error in its pb format + * @param operation the original operation + * @param tsUUID a string containing the originating TS's UUID + * @return a row error + */ + static RowError fromRowErrorPb(Tserver.WriteResponsePB.PerRowErrorPB errorPB, + Operation operation, String tsUUID) { + WireProtocol.AppStatusPB statusPB = errorPB.getError(); + return new RowError(Status.fromPB(statusPB), operation, tsUUID); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowErrorsAndOverflowStatus.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowErrorsAndOverflowStatus.java new file mode 100644 index 0000000000..31cae5bc7b --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowErrorsAndOverflowStatus.java @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Container class used as a response when retrieving pending row errors. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class RowErrorsAndOverflowStatus { + private final RowError[] rowErrors; + private final boolean overflowed; + + RowErrorsAndOverflowStatus(RowError[] rowErrors, boolean overflowed) { + this.rowErrors = rowErrors; + this.overflowed = overflowed; + } + + /** + * Get the collected row errors. + * @return an array of row errors, may be empty + */ + public RowError[] getRowErrors() { + return rowErrors; + } + + /** + * Check if the error collector had an overflow and had to discard row errors. + * @return true if row errors were discarded, false otherwise + */ + public boolean isOverflowed() { + return overflowed; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("rowErrors size: ").append(rowErrors.length); + sb.append(", rowErrors: ["); + if (rowErrors.length > 0) { + sb.append(rowErrors[0].toString()); + for (int i = 1; i < rowErrors.length; i++) { + sb.append(", ").append(rowErrors[i].toString()); + } + } + sb.append("], overflowed: ").append(overflowed); + return sb.toString(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowResult.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowResult.java new file mode 100644 index 0000000000..b3e81cfc8c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowResult.java @@ -0,0 +1,669 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.Arrays; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.util.DateUtil; +import org.apache.kudu.util.TimestampUtil; + +/** + * RowResult represents one row from a scanner. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public abstract class RowResult { + + protected static final int INDEX_RESET_LOCATION = -1; + protected int index = INDEX_RESET_LOCATION; + + protected final Schema schema; + + /** + * Prepares the row representation using the provided data. Doesn't copy data + * out of the byte arrays. Package private. + * @param schema Schema used to build the rowData + * @param rowIndex The index of the row in the rowData that this RowResult represents + */ + RowResult(Schema schema, int rowIndex) { + this.schema = schema; + this.index = rowIndex; + } + + void resetPointer() { + advancePointerTo(INDEX_RESET_LOCATION); + } + + /** + * Package-protected, only meant to be used by the RowResultIterator + */ + void advancePointerTo(int rowIndex) { + this.index = rowIndex; + } + + /** + * Get the specified column's integer + * @param columnName name of the column to get data for + * @return an integer + * @throws IllegalArgumentException if the column doesn't exist, is null, + * or if the type doesn't match the column's type + */ + public final int getInt(String columnName) { + return getInt(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's integer + * @param columnIndex Column index in the schema + * @return an integer + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract int getInt(int columnIndex); + + /** + * Get the specified column's short + * @param columnName name of the column to get data for + * @return a short + * @throws IllegalArgumentException if the column doesn't exist, is null, + * or if the type doesn't match the column's type + */ + public final short getShort(String columnName) { + return getShort(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's short + * @param columnIndex Column index in the schema + * @return a short + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract short getShort(int columnIndex); + + /** + * Get the specified column's boolean + * @param columnName name of the column to get data for + * @return a boolean + * @throws IllegalArgumentException if the column doesn't exist, is null, + * or if the type doesn't match the column's type + */ + public final boolean getBoolean(String columnName) { + return getBoolean(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's boolean + * @param columnIndex Column index in the schema + * @return a boolean + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract boolean getBoolean(int columnIndex); + + /** + * Get the specified column's byte + * @param columnName name of the column to get data for + * @return a byte + * @throws IllegalArgumentException if the column doesn't exist, is null, + * or if the type doesn't match the column's type + */ + public final byte getByte(String columnName) { + return getByte(this.schema.getColumnIndex(columnName)); + + } + + /** + * Get the specified column's byte + * @param columnIndex Column index in the schema + * @return a byte + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract byte getByte(int columnIndex); + + /** + * Get the specified column's long + * + * If this is a UNIXTIME_MICROS column, the long value corresponds to a number of microseconds + * since midnight, January 1, 1970 UTC. + * + * @param columnName name of the column to get data for + * @return a positive long + * @throws IllegalArgumentException if the column doesn't exist or is null + */ + public final long getLong(String columnName) { + return getLong(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's long + * + * If this is a UNIXTIME_MICROS column, the long value corresponds to a number of microseconds + * since midnight, January 1, 1970 UTC. + * + * @param columnIndex Column index in the schema + * @return a positive long + * @throws IllegalArgumentException if the column is null + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract long getLong(int columnIndex); + + /** + * Get the specified column's float + * @param columnName name of the column to get data for + * @return a float + * @throws IllegalArgumentException if the column doesn't exist, is null, + * or if the type doesn't match the column's type + */ + public final float getFloat(String columnName) { + return getFloat(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's float + * @param columnIndex Column index in the schema + * @return a float + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract float getFloat(int columnIndex); + + /** + * Get the specified column's double + * @param columnName name of the column to get data for + * @return a double + * @throws IllegalArgumentException if the column doesn't exist, is null, + * or if the type doesn't match the column's type + */ + public final double getDouble(String columnName) { + return getDouble(this.schema.getColumnIndex(columnName)); + + } + + /** + * Get the specified column's double + * @param columnIndex Column index in the schema + * @return a double + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract double getDouble(int columnIndex); + + /** + * Get the specified column's Decimal. + * + * @param columnName name of the column to get data for + * @return a BigDecimal + * @throws IllegalArgumentException if the column doesn't exist or is null + */ + public final BigDecimal getDecimal(String columnName) { + return getDecimal(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's Decimal. + * + * @param columnIndex Column index in the schema + * @return a BigDecimal. + * @throws IllegalArgumentException if the column is null + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract BigDecimal getDecimal(int columnIndex); + + /** + * Get the specified column's Timestamp. + * + * @param columnName name of the column to get data for + * @return a Timestamp + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public final Timestamp getTimestamp(String columnName) { + return getTimestamp(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's Timestamp. + * + * @param columnIndex Column index in the schema + * @return a Timestamp + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract Timestamp getTimestamp(int columnIndex); + + /** + * Get the specified column's Date. + * + * @param columnName name of the column to get data for + * @return a Date + * @throws IllegalArgumentException if the column doesn't exist, + * is null, is unset, or the type doesn't match the column's type + */ + public final Date getDate(String columnName) { + return getDate(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's Date. + * + * @param columnIndex Column index in the schema + * @return a Date + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public final Date getDate(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.DATE); + int days = getInt(columnIndex); + return DateUtil.epochDaysToSqlDate(days); + } + + /** + * Get the schema used for this scanner's column projection. + * @return a column projection as a schema. + */ + public final Schema getColumnProjection() { + return this.schema; + } + + /** + * Get the specified column's string. + * @param columnName name of the column to get data for + * @return a string + * @throws IllegalArgumentException if the column doesn't exist, is null, + * or if the type doesn't match the column's type + */ + public final String getString(String columnName) { + return getString(this.schema.getColumnIndex(columnName)); + + } + + /** + * Get the specified column's string. + * @param columnIndex Column index in the schema + * @return a string + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public final String getString(int columnIndex) { + checkType(columnIndex, Type.STRING); + return getVarLengthData(columnIndex); + } + + protected abstract String getVarLengthData(int columnIndex); + + /** + * Get the specified column's varchar. + * @param columnIndex Column index in the schema + * @return a string + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public final String getVarchar(int columnIndex) { + checkType(columnIndex, Type.VARCHAR); + return getVarLengthData(columnIndex); + } + + /** + * Get the specified column's varchar. + * @param columnName name of the column to get data for + * @return a string + * @throws IllegalArgumentException if the column doesn't exist, is null, + * or if the type doesn't match the column's type + */ + public final String getVarchar(String columnName) { + return getVarchar(this.schema.getColumnIndex(columnName)); + } + + /** + * Get a copy of the specified column's binary data. + * @param columnName name of the column to get data for + * @return a byte[] with the binary data. + * @throws IllegalArgumentException if the column doesn't exist, is null, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public final byte[] getBinaryCopy(String columnName) { + return getBinaryCopy(this.schema.getColumnIndex(columnName)); + + } + + /** + * Get a copy of the specified column's binary data. + * @param columnIndex Column index in the schema + * @return a byte[] with the binary data. + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract byte[] getBinaryCopy(int columnIndex); + + /** + * Get the specified column's binary data. + * + * This doesn't copy the data and instead returns a ByteBuffer that wraps it. + * + * @param columnName name of the column to get data for + * @return a ByteBuffer with the binary data. + * @throws IllegalArgumentException if the column doesn't exist, is null, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public final ByteBuffer getBinary(String columnName) { + return getBinary(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's binary data. + * + * This doesn't copy the data and instead returns a ByteBuffer that wraps it. + * + * @param columnIndex Column index in the schema + * @return a ByteBuffer with the binary data. + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract ByteBuffer getBinary(int columnIndex); + + /** + * Get if the specified column is NULL + * @param columnName name of the column in the schema + * @return true if the column cell is null and the column is nullable, + * false otherwise + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public final boolean isNull(String columnName) { + return isNull(this.schema.getColumnIndex(columnName)); + } + + /** + * Get if the specified column is NULL + * @param columnIndex Column index in the schema + * @return true if the column cell is null and the column is nullable, + * false otherwise + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public abstract boolean isNull(int columnIndex); + + /** + * Get the specified column's value as an Object. + * + * This method is useful when you don't care about autoboxing + * and your existing type handling logic is based on Java types. + * + * The Object type is based on the column's {@link Type}: + * Type.BOOL -> java.lang.Boolean + * Type.INT8 -> java.lang.Byte + * Type.INT16 -> java.lang.Short + * Type.INT32 -> java.lang.Integer + * Type.INT64 -> java.lang.Long + * Type.UNIXTIME_MICROS -> java.sql.Timestamp + * Type.FLOAT -> java.lang.Float + * Type.DOUBLE -> java.lang.Double + * Type.VARCHAR -> java.lang.String + * Type.STRING -> java.lang.String + * Type.BINARY -> byte[] + * Type.DECIMAL -> java.math.BigDecimal + * + * @param columnName name of the column in the schema + * @return the column's value as an Object, null if the value is null + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public final Object getObject(String columnName) { + return getObject(this.schema.getColumnIndex(columnName)); + } + + /** + * Get the specified column's value as an Object. + * + * This method is useful when you don't care about autoboxing + * and your existing type handling logic is based on Java types. + * + * The Object type is based on the column's {@link Type}: + * Type.BOOL -> java.lang.Boolean + * Type.INT8 -> java.lang.Byte + * Type.INT16 -> java.lang.Short + * Type.INT32 -> java.lang.Integer + * Type.INT64 -> java.lang.Long + * Type.UNIXTIME_MICROS -> java.sql.Timestamp + * Type.FLOAT -> java.lang.Float + * Type.DOUBLE -> java.lang.Double + * Type.VARCHAR -> java.lang.String + * Type.STRING -> java.lang.String + * Type.BINARY -> byte[] + * Type.DECIMAL -> java.math.BigDecimal + * Type.Date -> java.sql.Date + * + * @param columnIndex Column index in the schema + * @return the column's value as an Object, null if the value is null + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public final Object getObject(int columnIndex) { + checkValidColumn(columnIndex); + if (isNull(columnIndex)) { + return null; + } + Type type = schema.getColumnByIndex(columnIndex).getType(); + switch (type) { + case BOOL: return getBoolean(columnIndex); + case INT8: return getByte(columnIndex); + case INT16: return getShort(columnIndex); + case INT32: return getInt(columnIndex); + case INT64: return getLong(columnIndex); + case DATE: return getDate(columnIndex); + case UNIXTIME_MICROS: return getTimestamp(columnIndex); + case FLOAT: return getFloat(columnIndex); + case DOUBLE: return getDouble(columnIndex); + case VARCHAR: return getVarchar(columnIndex); + case STRING: return getString(columnIndex); + case BINARY: return getBinaryCopy(columnIndex); + case DECIMAL: return getDecimal(columnIndex); + default: throw new UnsupportedOperationException("Unsupported type: " + type); + } + } + + /** + * @return true if the RowResult has the IS_DELETED virtual column + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public final boolean hasIsDeleted() { + return schema.hasIsDeleted(); + } + + /** + * @return the value of the IS_DELETED virtual column + * @throws IllegalStateException if no IS_DELETED virtual column exists + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public final boolean isDeleted() { + return getBoolean(schema.getIsDeletedIndex()); + } + + /** + * Get the type of a column in this result. + * @param columnName name of the column + * @return a type + */ + public final Type getColumnType(String columnName) { + return this.schema.getColumn(columnName).getType(); + } + + /** + * Get the type of a column in this result. + * @param columnIndex column index in the schema + * @return a type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + public final Type getColumnType(int columnIndex) { + return this.schema.getColumnByIndex(columnIndex).getType(); + } + + /** + * Get the schema associated with this result. + * @return a schema + */ + public final Schema getSchema() { + return schema; + } + + /** + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + protected final void checkValidColumn(int columnIndex) { + if (columnIndex >= schema.getColumnCount()) { + throw new IndexOutOfBoundsException("Requested column is out of range, " + + columnIndex + " out of " + schema.getColumnCount()); + } + } + + /** + * @throws IllegalArgumentException if the column is null + */ + protected final void checkNull(int columnIndex) { + if (!schema.hasNullableColumns()) { + return; + } + if (isNull(columnIndex)) { + ColumnSchema columnSchema = schema.getColumnByIndex(columnIndex); + throw new IllegalArgumentException("The requested column (name: " + columnSchema.getName() + + ", index: " + columnIndex + ") is null"); + } + } + + protected final void checkType(int columnIndex, Type... types) { + ColumnSchema columnSchema = schema.getColumnByIndex(columnIndex); + Type columnType = columnSchema.getType(); + for (Type type : types) { + if (columnType.equals(type)) { + return; + } + } + throw new IllegalArgumentException("Column (name: " + columnSchema.getName() + + ", index: " + columnIndex + ") is of type " + + columnType.getName() + " but was requested as a type " + Arrays.toString(types)); + } + + /** + * Return the actual data from this row in a stringified key=value + * form. + */ + public String rowToString() { + StringBuilder buf = new StringBuilder(); + for (int i = 0; i < schema.getColumnCount(); i++) { + ColumnSchema col = schema.getColumnByIndex(i); + if (i != 0) { + buf.append(", "); + } + Type type = col.getType(); + buf.append(type.name()); + buf.append(" ").append(col.getName()); + if (col.getTypeAttributes() != null) { + buf.append(col.getTypeAttributes().toStringForType(type)); + } + buf.append("="); + if (isNull(i)) { + buf.append("NULL"); + } else { + switch (col.getType()) { + case INT8: + buf.append(getByte(i)); + break; + case INT16: + buf.append(getShort(i)); + break; + case INT32: + buf.append(getInt(i)); + break; + case INT64: + buf.append(getLong(i)); + break; + case DATE: + buf.append(DateUtil.epochDaysToDateString(getInt(i))); + break; + case UNIXTIME_MICROS: { + buf.append(TimestampUtil.timestampToString(getTimestamp(i))); + } break; + case VARCHAR: + buf.append(getVarchar(i)); + break; + case STRING: + buf.append(getString(i)); + break; + case BINARY: + buf.append(Bytes.pretty(getBinaryCopy(i))); + break; + case FLOAT: + buf.append(getFloat(i)); + break; + case DOUBLE: + buf.append(getDouble(i)); + break; + case DECIMAL: + buf.append(getDecimal(i)); + break; + case BOOL: + buf.append(getBoolean(i)); + break; + default: + buf.append(""); + break; + } + } + } + return buf.toString(); + } + + /** + * @return a string describing the location of this row result within + * the iterator as well as its data. + */ + public String toStringLongFormat() { + StringBuilder buf = new StringBuilder(); + buf.append(this.toString()); + buf.append("{"); + buf.append(rowToString()); + buf.append("}"); + return buf.toString(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowResultIterator.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowResultIterator.java new file mode 100644 index 0000000000..8c3cf05d51 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowResultIterator.java @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.Iterator; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Schema; + +/** + * Class that contains the rows sent by a tablet server, exhausting this iterator only means + * that all the rows from the last server response were read. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +@SuppressWarnings("IterableAndIterator") +public abstract class RowResultIterator extends KuduRpcResponse implements Iterator, + Iterable { + + protected final Schema schema; + protected final int numRows; + protected int currentRow = 0; + + /** + * Package private constructor, only meant to be instantiated from AsyncKuduScanner. + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param tsUUID UUID of the tablet server that handled our request + * @param schema schema used to parse the rows + * @param numRows how many rows are contained in the bs slice + */ + RowResultIterator(long elapsedMillis, + String tsUUID, + Schema schema, + int numRows) { + super(elapsedMillis, tsUUID); + this.schema = schema; + this.numRows = numRows; + } + + public int getNumRows() { + return this.numRows; + } + + public static RowResultIterator empty() { + return RowwiseRowResultIterator.empty(); + } + + @Override + public boolean hasNext() { + return this.currentRow < numRows; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + @Override + public Iterator iterator() { + return this; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowwiseRowResult.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowwiseRowResult.java new file mode 100644 index 0000000000..aa54fd3a67 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowwiseRowResult.java @@ -0,0 +1,372 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.sql.Timestamp; +import java.util.BitSet; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnTypeAttributes; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.util.Slice; +import org.apache.kudu.util.TimestampUtil; + +/** + * RowResult represents one row from a scanner, in row-wise layout. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +class RowwiseRowResult extends RowResult { + + private final Slice rowData; + private final Slice indirectData; + + private final int rowSize; + private final int[] columnOffsets; + + private int offset; + private BitSet nullsBitSet; + + /** + * Prepares the row representation using the provided data. Doesn't copy data + * out of the byte arrays. Package private. + * @param schema Schema used to build the rowData + * @param rowData The Slice of data returned by the tablet server + * @param indirectData The full indirect data that contains the strings + * @param rowIndex The index of the row in the rowData that this RowResult represents + */ + RowwiseRowResult(Schema schema, Slice rowData, Slice indirectData, int rowIndex) { + super(schema, rowIndex); + this.rowData = rowData; + this.indirectData = indirectData; + this.rowSize = this.schema.getRowSize(); + + int columnOffsetsSize = schema.getColumnCount(); + if (schema.hasNullableColumns()) { + columnOffsetsSize++; + } + columnOffsets = new int[columnOffsetsSize]; + // Empty projection, usually used for quick row counting. + if (columnOffsetsSize == 0) { + return; + } + int currentOffset = 0; + columnOffsets[0] = currentOffset; + // Pre-compute the columns offsets in rowData for easier lookups later. + // If the schema has nullables, we also add the offset for the null bitmap at the end. + for (int i = 1; i < columnOffsetsSize; i++) { + org.apache.kudu.ColumnSchema column = schema.getColumnByIndex(i - 1); + int previousSize = column.getTypeSize(); + columnOffsets[i] = previousSize + currentOffset; + currentOffset += previousSize; + } + advancePointerTo(rowIndex); + } + + /** + * Package-protected, only meant to be used by the RowResultIterator + */ + @Override + void advancePointerTo(int rowIndex) { + super.advancePointerTo(rowIndex); + + this.offset = this.rowSize * this.index; + if (schema.hasNullableColumns() && this.index != INDEX_RESET_LOCATION) { + this.nullsBitSet = Bytes.toBitSet( + this.rowData.getRawArray(), + this.rowData.getRawOffset() + + getCurrentRowDataOffsetForColumn(schema.getColumnCount()), + schema.getColumnCount()); + } + } + + int getCurrentRowDataOffsetForColumn(int columnIndex) { + return this.offset + this.columnOffsets[columnIndex]; + } + + /** + * Get the specified column's integer + * @param columnIndex Column index in the schema + * @return an integer + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public int getInt(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.INT32, Type.DATE); + return Bytes.getInt(this.rowData.getRawArray(), + this.rowData.getRawOffset() + getCurrentRowDataOffsetForColumn(columnIndex)); + } + + /** + * Get the specified column's short + * @param columnIndex Column index in the schema + * @return a short + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public short getShort(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.INT16); + return Bytes.getShort(this.rowData.getRawArray(), + this.rowData.getRawOffset() + getCurrentRowDataOffsetForColumn(columnIndex)); + } + + /** + * Get the specified column's boolean + * @param columnIndex Column index in the schema + * @return a boolean + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public boolean getBoolean(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.BOOL); + byte b = Bytes.getByte(this.rowData.getRawArray(), + this.rowData.getRawOffset() + + getCurrentRowDataOffsetForColumn(columnIndex)); + return b == 1; + } + + /** + * Get the specified column's byte + * @param columnIndex Column index in the schema + * @return a byte + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public byte getByte(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.INT8); + return Bytes.getByte(this.rowData.getRawArray(), + this.rowData.getRawOffset() + getCurrentRowDataOffsetForColumn(columnIndex)); + } + + /** + * Get the specified column's long + * + * If this is a UNIXTIME_MICROS column, the long value corresponds to a number of microseconds + * since midnight, January 1, 1970 UTC. + * + * @param columnIndex Column index in the schema + * @return a positive long + * @throws IllegalArgumentException if the column is null + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public long getLong(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.INT64, Type.UNIXTIME_MICROS); + return Bytes.getLong(this.rowData.getRawArray(), + this.rowData.getRawOffset() + getCurrentRowDataOffsetForColumn(columnIndex)); + } + + /** + * Get the specified column's float + * @param columnIndex Column index in the schema + * @return a float + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public float getFloat(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.FLOAT); + return Bytes.getFloat(this.rowData.getRawArray(), + this.rowData.getRawOffset() + + getCurrentRowDataOffsetForColumn(columnIndex)); + } + + /** + * Get the specified column's double + * @param columnIndex Column index in the schema + * @return a double + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public double getDouble(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.DOUBLE); + return Bytes.getDouble(this.rowData.getRawArray(), + this.rowData.getRawOffset() + + getCurrentRowDataOffsetForColumn(columnIndex)); + } + + /** + * Get the specified column's Decimal. + * + * @param columnIndex Column index in the schema + * @return a BigDecimal. + * @throws IllegalArgumentException if the column is null + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public BigDecimal getDecimal(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.DECIMAL); + ColumnSchema column = schema.getColumnByIndex(columnIndex); + ColumnTypeAttributes typeAttributes = column.getTypeAttributes(); + return Bytes.getDecimal(this.rowData.getRawArray(), + this.rowData.getRawOffset() + getCurrentRowDataOffsetForColumn(columnIndex), + typeAttributes.getPrecision(), typeAttributes.getScale()); + } + + /** + * Get the specified column's Timestamp. + * + * @param columnIndex Column index in the schema + * @return a Timestamp + * @throws IllegalArgumentException if the column is null, is unset, + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public Timestamp getTimestamp(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.UNIXTIME_MICROS); + long micros = getLong(columnIndex); + return TimestampUtil.microsToTimestamp(micros); + } + + @Override + public String getVarLengthData(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.STRING, Type.VARCHAR); + // C++ puts a Slice in rowData which is 16 bytes long for simplicity, but we only support ints. + long offset = getOffset(columnIndex); + long length = rowData.getLong(getCurrentRowDataOffsetForColumn(columnIndex) + 8); + assert offset < Integer.MAX_VALUE; + assert length < Integer.MAX_VALUE; + return Bytes.getString(indirectData.getRawArray(), + indirectData.getRawOffset() + (int)offset, + (int)length); + } + + /** + * Get a copy of the specified column's binary data. + * @param columnIndex Column index in the schema + * @return a byte[] with the binary data. + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public byte[] getBinaryCopy(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + // C++ puts a Slice in rowData which is 16 bytes long for simplicity, + // but we only support ints. + long offset = getOffset(columnIndex); + long length = rowData.getLong(getCurrentRowDataOffsetForColumn(columnIndex) + 8); + assert offset < Integer.MAX_VALUE; + assert length < Integer.MAX_VALUE; + byte[] ret = new byte[(int)length]; + System.arraycopy(indirectData.getRawArray(), indirectData.getRawOffset() + (int) offset, + ret, 0, (int) length); + return ret; + } + + /** + * Get the specified column's binary data. + * + * This doesn't copy the data and instead returns a ByteBuffer that wraps it. + * + * @param columnIndex Column index in the schema + * @return a ByteBuffer with the binary data. + * @throws IllegalArgumentException if the column is null + * or if the type doesn't match the column's type + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public ByteBuffer getBinary(int columnIndex) { + checkValidColumn(columnIndex); + checkNull(columnIndex); + checkType(columnIndex, Type.BINARY); + // C++ puts a Slice in rowData which is 16 bytes long for simplicity, + // but we only support ints. + long offset = getOffset(columnIndex); + long length = rowData.getLong(getCurrentRowDataOffsetForColumn(columnIndex) + 8); + assert offset < Integer.MAX_VALUE; + assert length < Integer.MAX_VALUE; + return ByteBuffer.wrap(indirectData.getRawArray(), indirectData.getRawOffset() + (int) offset, + (int) length); + } + + /** + * Returns the long column value if the column type is INT64 or UNIXTIME_MICROS. + * Returns the column's offset into the indirectData if the column type is BINARY or STRING. + * @param columnIndex Column index in the schema + * @return a long value for the column + */ + long getOffset(int columnIndex) { + return Bytes.getLong(this.rowData.getRawArray(), + this.rowData.getRawOffset() + + getCurrentRowDataOffsetForColumn(columnIndex)); + } + + /** + * Get if the specified column is NULL + * @param columnIndex Column index in the schema + * @return true if the column cell is null and the column is nullable, + * false otherwise + * @throws IndexOutOfBoundsException if the column doesn't exist + */ + @Override + public boolean isNull(int columnIndex) { + checkValidColumn(columnIndex); + if (nullsBitSet == null) { + return false; + } + return schema.getColumnByIndex(columnIndex).isNullable() && + nullsBitSet.get(columnIndex); + } + + + @Override + public String toString() { + return "RowResult(Rowwise) index: " + this.index + ", size: " + this.rowSize; + } + +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowwiseRowResultIterator.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowwiseRowResultIterator.java new file mode 100644 index 0000000000..d2d9a0bb04 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RowwiseRowResultIterator.java @@ -0,0 +1,124 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.NoSuchElementException; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Schema; +import org.apache.kudu.WireProtocol; +import org.apache.kudu.util.Slice; + +/** + * Class that contains the rows in row-wise layout sent by a tablet server, + * exhausting this iterator only means that all the rows from the last server response were read. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +@SuppressWarnings("IterableAndIterator") +class RowwiseRowResultIterator extends RowResultIterator { + + private static final RowwiseRowResultIterator EMPTY = + new RowwiseRowResultIterator(0, null, null, 0, null, null, false); + + private final Slice bs; + private final Slice indirectBs; + private final RowResult sharedRowResult; + + /** + * Package private constructor, only meant to be instantiated from AsyncKuduScanner. + * @param elapsedMillis time in milliseconds since RPC creation to now + * @param tsUUID UUID of the tablet server that handled our request + * @param schema schema used to parse the rows + * @param numRows how many rows are contained in the bs slice + * @param bs normal row data + * @param indirectBs indirect row data + * @param reuseRowResult reuse same row result for next row + */ + RowwiseRowResultIterator(long elapsedMillis, + String tsUUID, + Schema schema, + int numRows, + Slice bs, + Slice indirectBs, + boolean reuseRowResult) { + super(elapsedMillis, tsUUID, schema, numRows); + this.bs = bs; + this.indirectBs = indirectBs; + + this.sharedRowResult = (reuseRowResult && numRows != 0) ? + new RowwiseRowResult(this.schema, this.bs, this.indirectBs, -1) : null; + + } + + static RowwiseRowResultIterator makeRowResultIterator(long elapsedMillis, + String tsUUID, + Schema schema, + WireProtocol.RowwiseRowBlockPB data, + final CallResponse callResponse, + boolean reuseRowResult) + throws KuduException { + if (data == null || data.getNumRows() == 0) { + return new RowwiseRowResultIterator(elapsedMillis, tsUUID, schema, 0, + null, null, reuseRowResult); + } + + Slice bs = callResponse.getSidecar(data.getRowsSidecar()); + Slice indirectBs = callResponse.getSidecar(data.getIndirectDataSidecar()); + int numRows = data.getNumRows(); + + // Integrity check + int rowSize = schema.getRowSize(); + int expectedSize = numRows * rowSize; + if (expectedSize != bs.length()) { + Status statusIllegalState = Status.IllegalState("RowResult block has " + bs.length() + + " bytes of data but expected " + expectedSize + " for " + numRows + " rows"); + throw new NonRecoverableException(statusIllegalState); + } + return new RowwiseRowResultIterator(elapsedMillis, tsUUID, schema, numRows, + bs, indirectBs, reuseRowResult); + } + + /** + * @return an empty row result iterator + */ + public static RowwiseRowResultIterator empty() { + return EMPTY; + } + + @Override + public RowResult next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + // If sharedRowResult is not null, we should reuse it for every next call. + if (sharedRowResult != null) { + this.sharedRowResult.advancePointerTo(this.currentRow++); + return sharedRowResult; + } else { + return new RowwiseRowResult(this.schema, this.bs, this.indirectBs, this.currentRow++); + } + } + + @Override + public String toString() { + return "RowwiseRowResultIterator for " + this.numRows + " rows"; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcOutboundMessage.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcOutboundMessage.java new file mode 100644 index 0000000000..9fcd2212d9 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcOutboundMessage.java @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import com.google.protobuf.Message; +import com.google.protobuf.TextFormat; +import io.netty.buffer.ByteBuf; +import io.netty.channel.ChannelHandlerContext; +import io.netty.handler.codec.MessageToByteEncoder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.rpc.RpcHeader.RequestHeader; + +/** + * An RPC header and associated body protobuf which can be sent outbound + * through the Netty pipeline. The 'Encoder' inner class is responsible + * for serializing these instances into wire-format-compatible buffers. + */ +class RpcOutboundMessage { + private static final Logger LOG = LoggerFactory.getLogger(RpcOutboundMessage.class); + + private final RequestHeader.Builder headerBuilder; + private final Message body; + + RpcOutboundMessage(RequestHeader.Builder header, Message body) { + this.headerBuilder = header; + this.body = body; + } + + public RequestHeader.Builder getHeaderBuilder() { + return headerBuilder; + } + + public Message getBody() { + return body; + } + + @Override + public String toString() { + // TODO(todd): should this redact? it's only used at TRACE level, so hopefully OK. + return "RpcOutboundMessage[header={" + TextFormat.shortDebugString(headerBuilder) + + "}, body={" + TextFormat.shortDebugString(body) + "}]"; + } + + /** + * Netty encoder implementation to serialize outbound messages. + */ + static class Encoder extends MessageToByteEncoder { + + @Override + protected void encode(ChannelHandlerContext ctx, RpcOutboundMessage msg, ByteBuf out) { + if (LOG.isTraceEnabled()) { + LOG.trace("{}: sending RPC {}", ctx.channel(), msg); + } + // TODO(todd): move this impl into this class and remove external callers. + KuduRpc.toByteBuf(out, msg.getHeaderBuilder().build(), msg.getBody()); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcProxy.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcProxy.java new file mode 100644 index 0000000000..6d98e4a062 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcProxy.java @@ -0,0 +1,517 @@ +/* + * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the StumbleUpon nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.kudu.client; + +import static org.apache.kudu.client.KuduMetrics.RPC_REQUESTS_METRIC; +import static org.apache.kudu.client.KuduMetrics.RPC_RESPONSE_METRIC; +import static org.apache.kudu.client.KuduMetrics.RPC_RETRIES_METRIC; +import static org.apache.kudu.client.KuduMetrics.counter; + +import java.util.Set; +import javax.annotation.Nonnull; + +import com.google.common.base.Preconditions; +import com.google.protobuf.Message; +import com.stumbleupon.async.Callback; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.WireProtocol; +import org.apache.kudu.master.Master; +import org.apache.kudu.rpc.RpcHeader; +import org.apache.kudu.rpc.RpcHeader.RpcFeatureFlag; +import org.apache.kudu.transactions.TxnManager; +import org.apache.kudu.tserver.Tserver; +import org.apache.kudu.util.Pair; + +/** + * This is a 'stateless' helper to send RPCs to a Kudu server ('stateless' in the sense that it + * does not keep any state itself besides the references to the {@link AsyncKuduClient} and + * {@link Connection} objects. + *

+ * This helper serializes and de-serializes RPC requests and responses and provides handy + * methods to send the serialized RPC to the underlying {@link Connection} and to handle the + * response from it. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +class RpcProxy { + + private static final Logger LOG = LoggerFactory.getLogger(RpcProxy.class); + + private static int staticNumFail = 0; + private static Exception staticException = null; + + /** The reference to the top-level Kudu client object. */ + @Nonnull + private final AsyncKuduClient client; + + /** The reference to the object representing connection to the target server. */ + @Nonnull + private final Connection connection; + + /** + * Construct RpcProxy object. + * + * @param client top-level Kudu client object + * @param connection the connection associated with the target Kudu server + */ + RpcProxy(AsyncKuduClient client, Connection connection) { + this.client = Preconditions.checkNotNull(client); + this.connection = Preconditions.checkNotNull(connection); + } + + /** + * Fails the next numFail RPCs by throwing the passed exception. + * @param numFail the number of RPCs to fail + * @param exception the exception to throw when failing an rpc + */ + @InterfaceAudience.LimitedPrivate("Test") + static void failNextRpcs(int numFail, Exception exception) { + Preconditions.checkNotNull(exception); + staticNumFail = numFail; + staticException = exception; + } + + /** + * Send the specified RPC using the connection to the Kudu server. + * + * @param type of the RPC + * @param rpc the RPC to send over the connection + */ + void sendRpc(final KuduRpc rpc) { + sendRpc(client, connection, rpc); + } + + /** + * Send the specified RPC using the connection to the Kudu server. + * + * @param type of the RPC + * @param client client object to handle response and sending retries, if needed + * @param connection connection to send the request over + * @param rpc the RPC to send over the connection + */ + static void sendRpc(final AsyncKuduClient client, + final Connection connection, + final KuduRpc rpc) { + counter(RPC_REQUESTS_METRIC, rpcTags(client, connection, rpc)).increment(); + if (rpc.attempt > 1) { + counter(RPC_RETRIES_METRIC, rpcTags(client, connection, rpc)).increment(); + } + try { + // Throw an exception to enable testing failures. See `failNextRpcs`. + if (staticNumFail > 0) { + staticNumFail--; + LOG.warn("Forcing a failure on sendRpc: {}", rpc); + throw staticException; + } + if (!rpc.getRequiredFeatures().isEmpty()) { + // An extra optimization: when the peer's features are already known, check that the server + // supports feature flags, if those are required. + Set features = connection.getPeerFeatures(); + if (features != null && + !features.contains(RpcHeader.RpcFeatureFlag.APPLICATION_FEATURE_FLAGS)) { + throw new NonRecoverableException(Status.NotSupported( + "the server does not support the APPLICATION_FEATURE_FLAGS RPC feature")); + } + } + + Preconditions.checkArgument(rpc.hasDeferred()); + rpc.addTrace( + new RpcTraceFrame.RpcTraceFrameBuilder( + rpc.method(), + RpcTraceFrame.Action.SEND_TO_SERVER) + .serverInfo(connection.getServerInfo()) + .build()); + + connection.enqueueMessage(rpcToMessage(client, rpc), + new Callback() { + @Override + public Void call(Connection.CallResponseInfo callResponseInfo) throws Exception { + try { + responseReceived(client, connection, rpc, + callResponseInfo.response, callResponseInfo.exception); + } catch (Exception e) { + rpc.errback(e); + } + return null; + } + }); + } catch (RecoverableException e) { + // This is to handle RecoverableException(Status.IllegalState()) from + // Connection.enqueueMessage() if the connection turned into the TERMINATED state. + client.handleRetryableError(rpc, e); + } catch (Exception e) { + rpc.errback(e); + } + } + + /** + * Build {@link RpcOutboundMessage} out from {@link KuduRpc}. + * + * @param type of the RPC + * @param client client object to handle response and sending retries, if needed + * @param rpc the RPC to convert into outbound message + * @return the result {@link RpcOutboundMessage} + */ + private static RpcOutboundMessage rpcToMessage( + final AsyncKuduClient client, + final KuduRpc rpc) { + // The callId is set by Connection.enqueueMessage(). + final RpcHeader.RequestHeader.Builder headerBuilder = RpcHeader.RequestHeader.newBuilder() + .addAllRequiredFeatureFlags(rpc.getRequiredFeatures()) + .setRemoteMethod( + RpcHeader.RemoteMethodPB.newBuilder() + .setServiceName(rpc.serviceName()) + .setMethodName(rpc.method())); + // Before we create the request, get an authz token if needed. This is done + // regardless of whether the KuduRpc object already has a token; we may be + // a retrying due to an invalid token and the client may have a new token. + if (rpc.needsAuthzToken()) { + rpc.bindAuthzToken(client.getAuthzToken(rpc.getTable().getTableId())); + } + final Message reqPB = rpc.createRequestPB(); + // TODO(wdberkeley): We should enforce that every RPC has a timeout. + if (rpc.timeoutTracker.hasTimeout()) { + headerBuilder.setTimeoutMillis((int) rpc.timeoutTracker.getMillisBeforeTimeout()); + } + if (rpc.isRequestTracked()) { + RpcHeader.RequestIdPB.Builder requestIdBuilder = RpcHeader.RequestIdPB.newBuilder(); + final RequestTracker requestTracker = client.getRequestTracker(); + if (rpc.getSequenceId() == RequestTracker.NO_SEQ_NO) { + rpc.setSequenceId(requestTracker.newSeqNo()); + } + requestIdBuilder.setClientId(requestTracker.getClientId()); + requestIdBuilder.setSeqNo(rpc.getSequenceId()); + requestIdBuilder.setAttemptNo(rpc.attempt); + requestIdBuilder.setFirstIncompleteSeqNo(requestTracker.firstIncomplete()); + headerBuilder.setRequestId(requestIdBuilder); + } + + return new RpcOutboundMessage(headerBuilder, reqPB); + } + + private static void responseReceived(AsyncKuduClient client, + Connection connection, + final KuduRpc rpc, + CallResponse response, + KuduException ex) { + final long start = System.nanoTime(); + if (LOG.isTraceEnabled()) { + if (response == null) { + LOG.trace("{} received null response for RPC {}", + connection.getLogPrefix(), rpc); + } else { + RpcHeader.ResponseHeader header = response.getHeader(); + LOG.trace("{} received response with rpcId {}, size {} for RPC {}", + connection.getLogPrefix(), header.getCallId(), + response.getTotalResponseSize(), rpc); + } + } + counter(RPC_RESPONSE_METRIC, rpcTags(client, connection, rpc)).increment(); + RpcTraceFrame.RpcTraceFrameBuilder traceBuilder = new RpcTraceFrame.RpcTraceFrameBuilder( + rpc.method(), RpcTraceFrame.Action.RECEIVE_FROM_SERVER).serverInfo( + connection.getServerInfo()); + if (ex != null) { + if (ex instanceof InvalidAuthnTokenException) { + client.handleInvalidAuthnToken(rpc); + return; + } + if (ex instanceof InvalidAuthzTokenException) { + client.handleInvalidAuthzToken(rpc, ex); + return; + } + if (ex instanceof RecoverableException) { + // This check is specifically for the ERROR_SERVER_TOO_BUSY, ERROR_UNAVAILABLE and alike. + failOrRetryRpc(client, connection, rpc, (RecoverableException) ex); + return; + } + rpc.addTrace(traceBuilder.callStatus(ex.getStatus()).build()); + rpc.errback(ex); + return; + } + + Pair decoded = null; + KuduException exception = null; + try { + decoded = rpc.deserialize(response, connection.getServerInfo().getUuid()); + } catch (KuduException e) { + exception = e; + } catch (Exception e) { + rpc.addTrace(traceBuilder.build()); + rpc.errback(e); + return; + } + + // We can get this Message from within the RPC's expected type, + // so convert it into an exception and nullify decoded so that we use the errback route. + // Have to do it for both TS and Master errors. + if (decoded != null && decoded.getSecond() != null) { + if (decoded.getSecond() instanceof Tserver.TabletServerErrorPB) { + Tserver.TabletServerErrorPB error = (Tserver.TabletServerErrorPB) decoded.getSecond(); + exception = dispatchTSError(client, connection, rpc, error, traceBuilder); + if (exception == null) { + // It was taken care of. + return; + } else { + // We're going to errback. + decoded = null; + } + } else if (decoded.getSecond() instanceof Master.MasterErrorPB) { + Master.MasterErrorPB error = (Master.MasterErrorPB) decoded.getSecond(); + exception = dispatchMasterError(client, connection, rpc, error, traceBuilder); + if (exception == null) { + // Exception was taken care of. + return; + } else { + decoded = null; + } + } else if (decoded.getSecond() instanceof TxnManager.TxnManagerErrorPB) { + TxnManager.TxnManagerErrorPB error = + (TxnManager.TxnManagerErrorPB) decoded.getSecond(); + exception = dispatchTxnManagerError(client, rpc, error, traceBuilder); + if (exception == null) { + // Exception was taken care of. + return; + } else { + decoded = null; + } + } else { + rpc.addTrace(traceBuilder.build()); + exception = new NonRecoverableException(Status.NotSupported( + "unexpected error from server side: " + decoded.getSecond().toString())); + rpc.errback(exception); + return; + } + } + + try { + if (decoded != null) { + Preconditions.checkState(!(decoded.getFirst() instanceof Exception)); + if (client.isStatisticsEnabled()) { + rpc.updateStatistics(client.getStatistics(), decoded.getFirst()); + } + rpc.addTrace(traceBuilder.callStatus(Status.OK()).build()); + rpc.callback(decoded.getFirst()); + } else { + if (client.isStatisticsEnabled()) { + rpc.updateStatistics(client.getStatistics(), null); + } + rpc.addTrace(traceBuilder.callStatus(exception.getStatus()).build()); + rpc.errback(exception); + } + } catch (Exception e) { + RpcHeader.ResponseHeader header = response.getHeader(); + Preconditions.checkNotNull(header); + LOG.debug("{} unexpected exception {} while handling call: callId {}, RPC {}", + connection.getLogPrefix(), e, header.getCallId(), rpc); + } + if (LOG.isTraceEnabled()) { + LOG.trace("------------------<< LEAVING DECODE <<------------------ time elapsed: {} us", + ((System.nanoTime() - start) / 1000)); + } + } + + /** + * Takes care of a few kinds of TS errors that we handle differently, like tablets or leaders + * moving. Builds and returns an exception if we don't know what to do with it. + * + * @param client client object to handle response and sending retries, if needed + * @param connection connection to send the request over + * @param rpc the original RPC call that triggered the error + * @param error the error the TS sent + * @param tracer RPC trace builder to add a record on the error into the call history + * @return an exception if we couldn't dispatch the error, or null + */ + private static KuduException dispatchTSError(AsyncKuduClient client, + Connection connection, + KuduRpc rpc, + Tserver.TabletServerErrorPB error, + RpcTraceFrame.RpcTraceFrameBuilder tracer) { + Tserver.TabletServerErrorPB.Code errCode = error.getCode(); + WireProtocol.AppStatusPB.ErrorCode errStatusCode = error.getStatus().getCode(); + Status status = Status.fromTabletServerErrorPB(error); + if (errCode == Tserver.TabletServerErrorPB.Code.TABLET_NOT_FOUND || + errCode == Tserver.TabletServerErrorPB.Code.TABLET_NOT_RUNNING) { + // TODO(awong): for TABLET_NOT_FOUND, we may want to force a location + // lookup for the tablet. For now, this just invalidates the location + // and tries somewhere else. + client.handleTabletNotFound( + rpc, new RecoverableException(status), connection.getServerInfo()); + // we're not calling rpc.callback() so we rely on the client to retry that RPC + } else if (errStatusCode == WireProtocol.AppStatusPB.ErrorCode.SERVICE_UNAVAILABLE) { + client.handleRetryableError(rpc, new RecoverableException(status)); + // The following two error codes are an indication that the tablet isn't a leader. + } else if (errStatusCode == WireProtocol.AppStatusPB.ErrorCode.ILLEGAL_STATE || + errStatusCode == WireProtocol.AppStatusPB.ErrorCode.ABORTED) { + client.handleNotLeader(rpc, new RecoverableException(status), connection.getServerInfo()); + } else { + return new NonRecoverableException(status); + } + rpc.addTrace(tracer.callStatus(status).build()); + return null; + } + + /** + * Provides different handling for various kinds of master errors: re-uses the + * mechanisms already in place for handling tablet server errors as much as possible. + * + * @param client client object to handle response and sending retries, if needed + * @param connection connection to send the request over + * @param rpc the original RPC call that triggered the error + * @param error the error the master sent + * @param tracer RPC trace builder to add a record on the error into the call history + * @return an exception if we couldn't dispatch the error, or null + */ + private static KuduException dispatchMasterError(AsyncKuduClient client, + Connection connection, + KuduRpc rpc, + Master.MasterErrorPB error, + RpcTraceFrame.RpcTraceFrameBuilder tracer) { + + WireProtocol.AppStatusPB.ErrorCode code = error.getStatus().getCode(); + Status status = Status.fromMasterErrorPB(error); + if (error.getCode() == Master.MasterErrorPB.Code.NOT_THE_LEADER) { + client.handleNotLeader(rpc, new RecoverableException(status), connection.getServerInfo()); + } else if (code == WireProtocol.AppStatusPB.ErrorCode.SERVICE_UNAVAILABLE) { + if (rpc instanceof ConnectToMasterRequest) { + // Special case: + // We never want to retry this RPC, we only use it to poke masters to learn where the leader + // is. If the error is truly non recoverable, it'll be handled later. + return new RecoverableException(status); + } else { + // TODO: This is a crutch until we either don't have to retry RPCs going to the + // same server or use retry policies. + client.handleRetryableError(rpc, new RecoverableException(status)); + } + } else { + return new NonRecoverableException(status); + } + rpc.addTrace(tracer.callStatus(status).build()); + return null; + } + + /** + * Handle for various kinds of TxnManager errors. As of now, only + * SERVICE_UNAVAILABLE is a re-triable error. + * + * @param client client object to handle response and sending retries, if needed + * @param connection connection to send the request over + * @param rpc the original RPC call that triggered the error + * @param pbError the error the master sent + * @param tracer RPC trace builder to add a record on the error into the call history + * @return an exception if we couldn't dispatch the error, or null + */ + private static KuduException dispatchTxnManagerError( + AsyncKuduClient client, + KuduRpc rpc, + TxnManager.TxnManagerErrorPB pbError, + RpcTraceFrame.RpcTraceFrameBuilder tracer) { + final WireProtocol.AppStatusPB.ErrorCode code = pbError.getStatus().getCode(); + final Status status = Status.fromTxnManagerErrorPB(pbError); + if (code != WireProtocol.AppStatusPB.ErrorCode.SERVICE_UNAVAILABLE) { + return new NonRecoverableException(status); + } + + // TODO(aserbin): try sending request to other TxnManager instance, + // if possible. The idea is that Kudu clusters are expected + // to have multiple masters, so if one TxnManager + // instance is not available, there is a high chance that + // others are still available (TxnManager is hosted by a + // kudu-master process). + client.handleRetryableError(rpc, new RecoverableException(status)); + rpc.addTrace(tracer.callStatus(status).build()); + return null; + } + + /** + * Retry the given RPC. + * + * @param client client object to handle response and sending retries, if needed + * @param connection connection to send the request over + * @param rpc an RPC to retry or fail + * @param exception an exception to propagate with the RPC + */ + private static void failOrRetryRpc(AsyncKuduClient client, + Connection connection, + final KuduRpc rpc, + final RecoverableException exception) { + rpc.addTrace(new RpcTraceFrame.RpcTraceFrameBuilder(rpc.method(), + RpcTraceFrame.Action.RECEIVE_FROM_SERVER) + .serverInfo(connection.getServerInfo()) + .callStatus(exception.getStatus()) + .build()); + + RemoteTablet tablet = rpc.getTablet(); + // Note: As of the time of writing (03/11/16), a null tablet doesn't make sense, if we see a + // null tablet it's because we didn't set it properly before calling sendRpc(). + if (tablet == null) { // Can't retry, dunno where this RPC should go. + rpc.errback(exception); + return; + } + if (exception instanceof InvalidAuthnTokenException) { + client.handleInvalidAuthnToken(rpc); + } else if (exception instanceof InvalidAuthzTokenException) { + client.handleInvalidAuthzToken(rpc, exception); + } else if (exception.getStatus().isServiceUnavailable()) { + client.handleRetryableError(rpc, exception); + } else { + // If we don't really know anything about the exception, invalidate the location for the + // tablet, opening the possibility of retrying on a different server. + client.handleTabletNotFound(rpc, exception, connection.getServerInfo()); + } + } + + /** + * @return string representation of the object suitable for printing into logs, etc. + */ + @Override + public String toString() { + return "RpcProxy@" + hashCode() + ", connection=" + connection; + } + + /** + * @return underlying {@link Connection} object representing TCP connection to the server + */ + @InterfaceAudience.LimitedPrivate("Test") + Connection getConnection() { + return connection; + } + + private static String[] rpcTags(final AsyncKuduClient client, + final Connection connection, + final KuduRpc rpc) { + return new String[] { + KuduMetrics.SERVICE_NAME_TAG, rpc.serviceName(), + KuduMetrics.METHOD_NAME_TAG, rpc.method(), + KuduMetrics.SERVER_ID_TAG, connection.getServerInfo().getUuid(), + KuduMetrics.CLIENT_ID_TAG, client.getClientId() + }; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcRemoteException.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcRemoteException.java new file mode 100644 index 0000000000..9a74d3fd89 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcRemoteException.java @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.rpc.RpcHeader; + +@InterfaceAudience.Private +public class RpcRemoteException extends NonRecoverableException { + private static final long serialVersionUID = 1941395686030839186L; + private final RpcHeader.ErrorStatusPB errPb; + + RpcRemoteException(Status status, RpcHeader.ErrorStatusPB errPb) { + super(status); + this.errPb = errPb; + } + + @InterfaceAudience.Private + public RpcHeader.ErrorStatusPB getErrPB() { + return errPb; + } + +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcTraceFrame.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcTraceFrame.java new file mode 100644 index 0000000000..83dc1f3f78 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/RpcTraceFrame.java @@ -0,0 +1,309 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import com.google.common.base.MoreObjects; +import com.google.common.collect.Table; +import com.google.common.collect.TreeBasedTable; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Container class for traces. Most of its properties can be null, when they aren't set via the + * builder. The timestamp is set automatically. + */ +@InterfaceAudience.Private +class RpcTraceFrame { + enum Action { + // Just before putting the RPC on the wire. + SEND_TO_SERVER { + @Override + void appendToStringBuilder(RpcTraceFrame trace, StringBuilder sb) { + sb.append(String.format("sending RPC to server %s", + trace.getServer().getUuid())); + } + }, + // Just after parsing the response from the server. + RECEIVE_FROM_SERVER { + @Override + void appendToStringBuilder(RpcTraceFrame trace, StringBuilder sb) { + sb.append(String.format("received response from server %s: %s", + trace.getServer().getUuid(), + trace.getStatus())); + } + }, + // Just before sleeping and then retrying. + SLEEP_THEN_RETRY { + @Override + void appendToStringBuilder(RpcTraceFrame trace, StringBuilder sb) { + sb.append(String.format("delaying RPC due to: %s", trace.getStatus())); + } + }, + // Waiting for a new authn token to re-send the request. + GET_NEW_AUTHENTICATION_TOKEN_THEN_RETRY { + @Override + void appendToStringBuilder(RpcTraceFrame trace, StringBuilder sb) { + sb.append("waiting for new authn token"); + } + }, + // After having figured out that we don't know where the RPC is going, + // before querying the master. + QUERY_MASTER { + @Override + void appendToStringBuilder(RpcTraceFrame trace, StringBuilder sb) { + sb.append("refreshing cache from master"); + } + }, + // Once the trace becomes too large, will be the last trace object in the list. + TRACE_TRUNCATED { + @Override + void appendToStringBuilder(RpcTraceFrame trace, StringBuilder sb) { + sb.append(String.format("too many traces: truncated at %d traces", + KuduRpc.MAX_TRACES_SIZE)); + } + }; + + abstract void appendToStringBuilder(RpcTraceFrame trace, StringBuilder sb); + } + + private final String rpcMethod; + private final Action action; + private final ServerInfo serverInfo; + private final long timestampMs; + private final Status callStatus; + + private RpcTraceFrame(String rpcMethod, Action action, + ServerInfo serverInfo, Status callStatus) { + this.rpcMethod = rpcMethod; + this.action = action; + this.serverInfo = serverInfo; + this.callStatus = callStatus; + this.timestampMs = System.currentTimeMillis(); + } + + public String getRpcMethod() { + return rpcMethod; + } + + Action getAction() { + return action; + } + + ServerInfo getServer() { + return serverInfo; + } + + long getTimestampMs() { + return timestampMs; + } + + public Status getStatus() { + return callStatus; + } + + public static String getHumanReadableStringForTraces(List traces) { + String rootMethod; + long baseTimestamp; + if (traces.isEmpty()) { + return "No traces"; + } else { + RpcTraceFrame firstTrace = traces.get(0); + rootMethod = firstTrace.getRpcMethod(); + baseTimestamp = firstTrace.getTimestampMs(); + } + + StringBuilder sb = new StringBuilder("Traces: "); + for (int i = 0; i < traces.size(); i++) { + RpcTraceFrame trace = traces.get(i); + sb.append('['); + sb.append(trace.getTimestampMs() - baseTimestamp); + sb.append("ms] "); + + if (!rootMethod.equals(trace.getRpcMethod())) { + sb.append(String.format("Sub RPC %s: ", trace.getRpcMethod())); + } + + trace.getAction().appendToStringBuilder(trace, sb); + + if (i < traces.size() - 1) { + sb.append(", "); + } + } + return sb.toString(); + } + + /** + * Returns a String wih the trace summary in the following format: + * + * Trace Summary(trace-duration ms): Sent(n), Received(n), Delayed(n), MasterRefresh(n), + * AuthRefresh(n), Truncated: ? + * Sent: (server-uuid, [ rpc-method, count ], ...), ... + * Received: (server-uuid, [ rpc-status, count ], ...), ... + * Delayed: (server-uuid, [ rpc-method, count ], ...), ... + */ + public static String getHumanReadableSummaryStringForTraces(List traces) { + if (traces.isEmpty()) { + return "No traces"; + } + + RpcTraceFrame firstTrace = traces.get(0); + long baseTimestamp = firstTrace.getTimestampMs(); + + // Table with Server UUID as the row, RPC Method as the column, + // and count as the value for each send trace. + Table sentTable = TreeBasedTable.create(); + long sentCount = 0; + // Table with Server UUID as the row, RPC Status as the column, + // and count as the value for each receive trace. + Table receivedTable = TreeBasedTable.create(); + long receivedCount = 0; + // Table with Server UUID as the row, RPC Method as the column, + // and count as the value for each delay trace. + Table delayedTable = TreeBasedTable.create(); + long delayedCount = 0; + long masterRefreshCount = 0; + long authRefreshCount = 0; + boolean truncated = false; + long maxTime = 0; + + for (RpcTraceFrame trace : traces) { + String uuid = trace.getServer() == null ? "UNKNOWN" : trace.getServer().getUuid(); + String method = trace.getRpcMethod() == null ? "UNKNOWN" : trace.getRpcMethod(); + String status = trace.getStatus() == null ? "UNKNOWN" : trace.getStatus().getCodeName(); + switch (trace.getAction()) { + case SEND_TO_SERVER: { + long count = sentTable.contains(uuid, method) ? sentTable.get(uuid, method) : 0L; + sentTable.put(uuid, method, count + 1); + sentCount++; + break; + } + case RECEIVE_FROM_SERVER: { + long count = receivedTable.contains(uuid, status) ? receivedTable.get(uuid, status) : 0L; + receivedTable.put(uuid, status, count + 1); + receivedCount++; + break; + } + case SLEEP_THEN_RETRY: { + long count = delayedTable.contains(uuid, method) ? delayedTable.get(uuid, method) : 0L; + delayedTable.put(uuid, method, count + 1); + delayedCount++; + break; + } + case QUERY_MASTER: + masterRefreshCount++; + break; + case GET_NEW_AUTHENTICATION_TOKEN_THEN_RETRY: + authRefreshCount++; + break; + case TRACE_TRUNCATED: + truncated = true; + break; + default: + throw new IllegalArgumentException("Unexpected action: " + trace.getAction()); + } + maxTime = Long.max(maxTime, trace.getTimestampMs() - baseTimestamp); + } + + StringBuilder sb = new StringBuilder(); + sb.append(String.format("Trace Summary(%s ms): Sent(%s), Received(%s), Delayed(%s), " + + "MasterRefresh(%s), AuthRefresh(%s), Truncated: %s", + maxTime, sentCount, receivedCount, delayedCount, masterRefreshCount, authRefreshCount, + truncated)); + if (!sentTable.isEmpty()) { + sb.append(String.format("%n Sent: %s", tableToString(sentTable))); + } + if (!receivedTable.isEmpty()) { + sb.append(String.format("%n Received: %s", tableToString(receivedTable))); + } + if (!delayedTable.isEmpty()) { + sb.append(String.format("%n Delayed: %s", tableToString(delayedTable))); + } + return sb.toString(); + } + + /** + * Returns a string representation of the table in the format of: + * (row, [ column, value ], ...), ..." + */ + private static String tableToString(Table table) { + StringBuilder sb = new StringBuilder(); + List rowKeys = new ArrayList<>(table.rowKeySet()); + for (int i = 0; i < rowKeys.size(); i++) { + String rowKey = rowKeys.get(i); + sb.append("(").append(rowKey).append(", "); + List> columns = new ArrayList<>(table.row(rowKey).entrySet()); + for (int j = 0; j < columns.size(); j++) { + Map.Entry column = columns.get(j); + sb.append(String.format("[ %s, %s ]", column.getKey(), column.getValue())); + if (j < columns.size() - 1) { + sb.append(", "); + } + } + + sb.append(")"); + if (i < rowKeys.size() - 1) { + sb.append(", "); + } + } + return sb.toString(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("rpcMethod", rpcMethod) + .add("timestampMs", timestampMs) + .add("action", action) + .add("serverInfo", serverInfo) + .add("callStatus", callStatus) + .toString(); + } + + /** + * Builder class for trace frames. The only required parameters are set in the constructor. + * Timestamp is set automatically. + */ + static class RpcTraceFrameBuilder { + private final String rpcMethod; + private final Action action; + private ServerInfo serverInfo; + private Status callStatus; + + RpcTraceFrameBuilder(String rpcMethod, Action action) { + this.rpcMethod = rpcMethod; + this.action = action; + } + + public RpcTraceFrameBuilder serverInfo(ServerInfo serverInfo) { + this.serverInfo = serverInfo; + return this; + } + + public RpcTraceFrameBuilder callStatus(Status callStatus) { + this.callStatus = callStatus; + return this; + } + + public RpcTraceFrame build() { + return new RpcTraceFrame(rpcMethod, action, serverInfo, callStatus); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SecurityContext.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SecurityContext.java new file mode 100644 index 0000000000..cc2c8733d6 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SecurityContext.java @@ -0,0 +1,501 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.io.IOException; +import java.security.AccessControlContext; +import java.security.AccessController; +import java.security.GeneralSecurityException; +import java.security.KeyStore; +import java.security.PrivilegedActionException; +import java.security.PrivilegedExceptionAction; +import java.security.cert.CertificateException; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import javax.annotation.concurrent.GuardedBy; +import javax.net.ssl.SSLContext; +import javax.net.ssl.SSLEngine; +import javax.net.ssl.TrustManager; +import javax.net.ssl.TrustManagerFactory; +import javax.net.ssl.X509TrustManager; +import javax.security.auth.Subject; +import javax.security.auth.kerberos.KerberosPrincipal; + +import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.protobuf.ByteString; +import com.google.protobuf.InvalidProtocolBufferException; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.client.Client.AuthenticationCredentialsPB; +import org.apache.kudu.security.Token.JwtRawPB; +import org.apache.kudu.security.Token.SignedTokenPB; +import org.apache.kudu.security.Token.TokenPB; +import org.apache.kudu.util.Pair; +import org.apache.kudu.util.SecurityUtil; + +/** + * Class associated with a single AsyncKuduClient which stores security-related + * infrastructure, credentials, and trusted certificates. + * + * Each client has a single instance of this class. This class is threadsafe. + */ +class SecurityContext { + private static final Logger LOG = LoggerFactory.getLogger(SecurityContext.class); + + private static final long REFRESH_RATE_LIMIT_SECS = 10; + + @GuardedBy("this") + @Nullable + private SignedTokenPB authnToken; + + @GuardedBy("this") + @Nullable + private JwtRawPB jsonWebToken; + + @GuardedBy("this") + private String realUser; + + private final DelegatedTrustManager trustManager = new DelegatedTrustManager(); + + /** + * SSLContext which trusts only the configured certificate. + */ + private final SSLContext sslContextWithCert; + + /** + * SSLContext which trusts any certificate. + */ + private final SSLContext sslContextTrustAny; + + private final Object subjectLock = new Object(); + + /** + * The JAAS Subject that the client's credentials are stored in. + */ + @Nullable + @GuardedBy("subjectLock") + private Subject subject; + + private enum SubjectType { + /** + * The Subject was created when this class was instantiated. + */ + CREATED, + /** + * A Subject with appropriate credentials was provided by the caller who + * instantiated this class. + */ + PROVIDED, + /** + * We have no Subject at all (i.e we could not login on our own, and the + * caller did not provide a Subject with appropriate credentials). + */ + NONE + } + + @Nonnull + private final SubjectType subjectType; + + /** + * The currently trusted CA certs, in DER format. + */ + @InterfaceAudience.LimitedPrivate("Test") + @GuardedBy("this") + List trustedCertDers = Collections.emptyList(); + + @GuardedBy("subjectLock") + private long nextAllowedRefreshNanotime = 0; + + @GuardedBy("subjectLock") + private boolean loggedRefreshFailure = false; + + SecurityContext() { + try { + Pair p = setupSubject(); + this.subjectType = p.getFirst(); + this.subject = p.getSecond(); + + this.realUser = System.getProperty("user.name"); + + this.sslContextWithCert = SSLContext.getInstance("TLS"); + sslContextWithCert.init(null, new TrustManager[] { trustManager }, null); + + this.sslContextTrustAny = SSLContext.getInstance("TLS"); + sslContextTrustAny.init(null, new TrustManager[] { new TrustAnyCert() }, null); + } catch (GeneralSecurityException | RuntimeException e) { + throw new RuntimeException(e); + } + } + + private static Pair setupSubject() { + AccessControlContext context = AccessController.getContext(); + Subject subject = Subject.getSubject(context); + if (subject != null) { + if (!subject.getPrincipals(KerberosPrincipal.class).isEmpty()) { + LOG.debug("Using caller-provided subject with Kerberos principal {}. " + + "Caller is responsible for refreshing credentials.", + SecurityUtil.getKerberosPrincipalOrNull(subject)); + return new Pair<>(SubjectType.PROVIDED, subject); + } + LOG.debug("Caller-provided subject {} does not have any Kerberos credentials. " + + "Ignoring it.", subject.toString()); + } + + subject = SecurityUtil.getSubjectFromTicketCacheOrNull(); + if (subject != null) { + return new Pair<>(SubjectType.CREATED, subject); + } + // If we weren't able to login from a ticket cache when we create the client, + // we shouldn't later pick one up. + return new Pair<>(SubjectType.NONE, null); + } + + /** + * Check if the Subject associated with this SecurityContext needs to be refreshed, + * and if so, do so. If there is no associated subject this is a no-op. + */ + public void refreshSubject() { + if (subjectType == SubjectType.NONE) { + return; + } + synchronized (subjectLock) { + Subject localSubject = subject; + + boolean needed = SecurityUtil.needsRefresh(localSubject); + if (!needed) { + // If we don't need to refresh, but we previously logged a warning + // about a failure to refresh, then someone must have externally + // refreshed the Subject. + if (loggedRefreshFailure) { + LOG.info("Credentials appear to have been refreshed externally, subject={}", subject); + loggedRefreshFailure = false; + } + return; + } + + // Our current credentials are stale and need a refresh. + + if (subjectType == SubjectType.PROVIDED) { + // In the case that the user provided the subject, we don't attempt to + // muck with the tickets inside it. Instead, just log a warning + // if we haven't already. + if (!loggedRefreshFailure) { + LOG.warn("Caller-provided Subject has a Kerberos ticket that is about to expire. " + + "Kudu expects the application to renew or re-acquire its own tickets " + + "before expiration."); + loggedRefreshFailure = true; + } + return; + } + + // Don't attempt to refresh if we recently attempted to and failed. This + // prevents flooding the KDC, etc. + long now = System.nanoTime(); + // If we recently failed to refresh, don't retry. + if (now < nextAllowedRefreshNanotime) { + return; + } + + LOG.debug("Refreshing Kerberos credentials..."); + Subject newSubject; + try { + newSubject = Subject.doAs(new Subject(), + (PrivilegedExceptionAction) SecurityUtil::getSubjectFromTicketCacheOrNull); + } catch (PrivilegedActionException e) { + throw new RuntimeException(e.getCause()); + } + if (newSubject == null || SecurityUtil.getKerberosPrincipalOrNull(newSubject) == null) { + LOG.warn("Tried to refresh Kerberos credentials but was unable to re-login from " + + "ticket cache"); + loggedRefreshFailure = true; + nextAllowedRefreshNanotime = now + TimeUnit.SECONDS.toNanos(REFRESH_RATE_LIMIT_SECS); + return; + } + // It's possible that the ticket cache ended up with a different principal. + // If we accepted this new subject, that would cause us to switch principals + // in the context of a single Kudu client, or potentially have a different + // principal in use on different connections (eg one principal talking to one + // master and another principal to another). This would be very hard to diagnose + // so let's just refuse the re-login attempt if the principal switched. + KerberosPrincipal oldPrincipal = SecurityUtil.getKerberosPrincipalOrNull(localSubject); + KerberosPrincipal principal = SecurityUtil.getKerberosPrincipalOrNull(newSubject); + if (!Objects.equals(oldPrincipal, principal)) { + LOG.error("Attempted to refresh Kerberos credentials from ticket cache but found that " + + "the new Kerberos principal {} did not match the original principal {}. Ignoring.", + principal, oldPrincipal); + loggedRefreshFailure = true; + nextAllowedRefreshNanotime = now + TimeUnit.SECONDS.toNanos(REFRESH_RATE_LIMIT_SECS); + return; + } + + loggedRefreshFailure = false; + this.subject = newSubject; + LOG.info("Successfully refreshed Kerberos credentials from ticket cache"); + } + } + + @Nullable + public Subject getSubject() { + synchronized (subjectLock) { + return subject; + } + } + + public synchronized String getRealUser() { + return realUser; + } + + @Nullable + public synchronized byte[] exportAuthenticationCredentials() { + AuthenticationCredentialsPB.Builder pb = AuthenticationCredentialsPB.newBuilder(); + pb.setRealUser(realUser); + if (authnToken != null) { + pb.setAuthnToken(authnToken); + } + if (jsonWebToken != null) { + pb.setJwt(jsonWebToken); + } + pb.addAllCaCertDers(trustedCertDers); + return pb.build().toByteArray(); + } + + private static String getUserFromToken(SignedTokenPB token) + throws InvalidProtocolBufferException { + TokenPB pb = TokenPB.parseFrom(token.getTokenData()); + return pb.getAuthn().getUsername(); + } + + private static void checkUserMatches(SignedTokenPB oldToken, SignedTokenPB newToken) + throws InvalidProtocolBufferException { + String oldUser = getUserFromToken(oldToken); + String newUser = getUserFromToken(newToken); + + if (!oldUser.equals(newUser)) { + throw new IllegalArgumentException(String.format( + "cannot import authentication data from a different user: old='%s', new='%s'", + oldUser, newUser)); + } + } + + public synchronized void importAuthenticationCredentials(byte[] authnData) { + try { + AuthenticationCredentialsPB pb = AuthenticationCredentialsPB.parseFrom(authnData); + if (pb.hasAuthnToken() && authnToken != null) { + // TODO(todd): also check that, if there is a JAAS subject, that + // the subject in the imported authn token matches the Kerberos + // principal in the JAAS subject. Alternatively, this could + // completely disable the JAAS authentication path (assumedly if + // we import a token, we want to _only_ act as the user in that + // token, and would rather have a connection failure than flip + // back to GSSAPI transparently). + checkUserMatches(authnToken, pb.getAuthnToken()); + } + + LOG.debug("Importing authentication credentials with {} authn token, " + + "with {} JWT, {} cert(s), and realUser={}", + pb.hasAuthnToken() ? "one" : "no", + pb.hasJwt() ? "one" : "no", + pb.getCaCertDersCount(), + pb.hasRealUser() ? pb.getRealUser() : ""); + if (pb.hasAuthnToken()) { + authnToken = pb.getAuthnToken(); + } + + // only trust ca certificates automatically if they were acquired with mutual trust of + // identities + if (!pb.hasJwt()) { + trustCertificates(pb.getCaCertDersList()); + } + + if (pb.hasJwt()) { + // Don't overwrite the JWT in the context if it's already set. + if (jsonWebToken == null || !jsonWebToken.hasJwtData() || + (jsonWebToken.hasJwtData() && jsonWebToken.getJwtData().isEmpty())) { + jsonWebToken = pb.getJwt(); + } + } + + if (pb.hasRealUser()) { + realUser = pb.getRealUser(); + } + } catch (InvalidProtocolBufferException | CertificateException e) { + throw new IllegalArgumentException(e); + } + } + + /** + * @return the current authentication token, or null if we have no valid token + */ + @Nullable + public synchronized SignedTokenPB getAuthenticationToken() { + return authnToken; + } + + @Nullable + public synchronized JwtRawPB getJsonWebToken() { + return jsonWebToken; + } + + /** + * Set the token that we will use to authenticate to servers. Replaces any + * prior token. + */ + public synchronized void setAuthenticationToken(SignedTokenPB token) { + authnToken = token; + } + + /** + * Set the JWT that we will use to authenticate to the server. Replaces any + * prior JWT. + */ + public synchronized void setJsonWebToken(JwtRawPB jwt) { + jsonWebToken = jwt; + } + + /** + * Create an SSLEngine which will trust all certificates without verification. + */ + public SSLEngine createSSLEngineTrustAll() { + return sslContextTrustAny.createSSLEngine(); + } + + /** + * Create an SSLEngine which will trust only certificates that have a valid chain + * of trust. + */ + public SSLEngine createSSLEngine() { + return sslContextWithCert.createSSLEngine(); + } + + /** + * @return true if any cert has been marked as trusted + */ + public synchronized boolean hasTrustedCerts() { + return !trustedCertDers.isEmpty(); + } + + /** + * Create a trust manager which will trust all of the given CA certs. + */ + private static X509TrustManager createTrustManagerForCerts(Iterable certDers) + throws CertificateException { + CertificateFactory certFactory = CertificateFactory.getInstance("X.509"); + + List certs = Lists.newArrayList(); + for (ByteString certDer : certDers) { + certs.add((X509Certificate)certFactory.generateCertificate( + certDer.newInput())); + } + + // This is implemented by making a new TrustManager and swapping it out under + // our delegating trust manager. It might seem more straight-forward to instead + // just keep one keystore around and load new certs into it, but apparently the + // TrustManager loads the certs from the KeyStore upon creation, so adding new + // ones to an existing KeyStore doesn't have any effect. + try { + KeyStore certKeyStore = KeyStore.getInstance(KeyStore.getDefaultType()); + certKeyStore.load(null); + int i = 0; + for (X509Certificate cert : certs) { + certKeyStore.setCertificateEntry(String.format("cert-%d", i++), cert); + } + + TrustManagerFactory tmf = TrustManagerFactory.getInstance( + TrustManagerFactory.getDefaultAlgorithm()); + tmf.init(certKeyStore); + TrustManager[] managers = tmf.getTrustManagers(); + if (managers.length != 1) { + throw new RuntimeException("TrustManagerFactory generated multiple TrustManagers"); + } + return (X509TrustManager) managers[0]; + } catch (GeneralSecurityException | IOException | RuntimeException e) { + Throwables.throwIfInstanceOf(e, CertificateException.class); + throw new RuntimeException(e); + } + } + + /** + * Mark the given CA cert (provided in DER form) as the trusted CA cert for the + * client. Replaces any previously trusted cert. + * @throws CertificateException if the cert was invalid + */ + public void trustCertificates(List certDers) throws CertificateException { + X509TrustManager tm = createTrustManagerForCerts(certDers); + synchronized (this) { + trustManager.delegate.set(tm); + trustedCertDers = ImmutableList.copyOf(certDers); + } + } + + /** + * TrustManager implementation which will trust any certificate. + */ + private static class TrustAnyCert implements X509TrustManager { + @Override + public void checkClientTrusted(X509Certificate[] arg0, String arg1) + throws CertificateException { + } + + @Override + public void checkServerTrusted(X509Certificate[] arg0, String arg1) + throws CertificateException { + } + + @Override + public X509Certificate[] getAcceptedIssuers() { + return new X509Certificate[0]; + } + } + + /** + * Trust manager that delegates to an underlying trust manager which + * can be swapped out atomically. + */ + private static class DelegatedTrustManager implements X509TrustManager { + final AtomicReference delegate = new AtomicReference<>(); + + @Override + public void checkClientTrusted(X509Certificate[] chain, String authType) + throws CertificateException { + delegate.get().checkClientTrusted(chain, authType); + } + + @Override + public void checkServerTrusted(X509Certificate[] chain, String authType) + throws CertificateException { + delegate.get().checkServerTrusted(chain, authType); + } + + @Override + public X509Certificate[] getAcceptedIssuers() { + return delegate.get().getAcceptedIssuers(); + } + } + +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ServerInfo.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ServerInfo.java new file mode 100644 index 0000000000..17e9674d38 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/ServerInfo.java @@ -0,0 +1,139 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.UnknownHostException; +import java.util.Locale; +import java.util.concurrent.ConcurrentHashMap; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.util.NetUtil; + +/** + * Container class for server information that never changes, like UUID and hostname. + */ +@InterfaceAudience.Private +public class ServerInfo { + private final String uuid; + private final HostAndPort hostPort; + private final InetSocketAddress resolvedAddr; + private final String location; + private final boolean local; + private static final ConcurrentHashMap isLocalAddressCache = + new ConcurrentHashMap<>(); + + /** + * Constructor for all the fields. The intent is that there should only be one ServerInfo + * instance per UUID the client is connected to. + * @param uuid server's UUID + * @param hostPort server's hostname and port + * @param resolvedAddr resolved address used to check if the server is local + * @param location the location assigned by the leader master, or an empty string if no location + * is assigned + */ + public ServerInfo(String uuid, HostAndPort hostPort, InetAddress resolvedAddr, String location) { + Preconditions.checkNotNull(uuid); + Preconditions.checkArgument(hostPort.getPort() > 0); + Preconditions.checkNotNull(location); + this.uuid = uuid; + this.hostPort = hostPort; + this.resolvedAddr = new InetSocketAddress(resolvedAddr, hostPort.getPort()); + this.location = location; + this.local = isLocalAddressCache.computeIfAbsent(resolvedAddr, + inetAddress -> NetUtil.isLocalAddress(resolvedAddr)); + } + + /** + * Returns this server's uuid. + * @return a string that contains this server's uuid + */ + public String getUuid() { + return uuid; + } + + /** + * Returns this server's canonical hostname. + * @return a string that contains this server's canonical hostname + */ + public String getAndCanonicalizeHostname() { + try { + return InetAddress.getByName( + hostPort.getHost()).getCanonicalHostName().toLowerCase(Locale.ENGLISH); + } catch (UnknownHostException e) { + return hostPort.getHost(); + } + } + + /** + * Returns this server's hostname and port. + * @return a HostAndPort that describes where this server can be reached. + */ + public HostAndPort getHostAndPort() { + return hostPort; + } + + /** + * Returns this server's port. + * @return a port number that this server is bound to + */ + public int getPort() { + return hostPort.getPort(); + } + + /** + * Returns this server's location. If no location is assigned, returns an empty string. + * @return the server's location + */ + public String getLocation() { + return location; + } + + /** + * Returns true if the server is in the same location as 'location'. + * @return true if the server is in 'location'. + */ + public boolean inSameLocation(String loc) { + Preconditions.checkNotNull(loc); + return !loc.isEmpty() && + loc.equals(location); + } + + /** + * Returns if this server is on this client's host. + * @return true if the server is local, else false + */ + public boolean isLocal() { + return local; + } + + /** + * @return the cached resolved address for this server + */ + public InetSocketAddress getResolvedAddress() { + return resolvedAddr; + } + + @Override + public String toString() { + return uuid + "(" + hostPort + ")"; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SessionConfiguration.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SessionConfiguration.java new file mode 100644 index 0000000000..94bcb29347 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SessionConfiguration.java @@ -0,0 +1,226 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Interface that defines the methods used to configure a session. It also exposes ways to + * query its state. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface SessionConfiguration { + + @InterfaceAudience.Public + @InterfaceStability.Evolving + enum FlushMode { + /** + * Each {@link KuduSession#apply KuduSession.apply()} call will return only after being + * flushed to the server automatically. No batching will occur. + * + *

In this mode, the {@link KuduSession#flush} call never has any effect, since each + * {@link KuduSession#apply KuduSession.apply()} has already flushed the buffer before + * returning. + * + *

This is the default flush mode. + */ + AUTO_FLUSH_SYNC, + + /** + * {@link KuduSession#apply KuduSession.apply()} calls will return immediately, but the writes + * will be sent in the background, potentially batched together with other writes from + * the same session. If there is not sufficient buffer space, then + * {@link KuduSession#apply KuduSession.apply()} may block for buffer space to be available. + * + *

Because writes are applied in the background, any errors will be stored + * in a session-local buffer. Call {@link #countPendingErrors() countPendingErrors()} or + * {@link #getPendingErrors() getPendingErrors()} to retrieve them. + * + *

Note: The {@code AUTO_FLUSH_BACKGROUND} mode may result in + * out-of-order writes to Kudu. This is because in this mode multiple write + * operations may be sent to the server in parallel. + * See KUDU-1767 for more + * information. + * + *

The {@link KuduSession#flush()} call can be used to block until the buffer is empty. + */ + AUTO_FLUSH_BACKGROUND, + + /** + * {@link KuduSession#apply KuduSession.apply()} calls will return immediately, but the writes + * will not be sent until the user calls {@link KuduSession#flush()}. If the buffer runs past + * the configured space limit, then {@link KuduSession#apply KuduSession.apply()} will return + * an error. + */ + MANUAL_FLUSH + } + + /** + * Get the current flush mode. + * @return flush mode, {@link FlushMode#AUTO_FLUSH_SYNC AUTO_FLUSH_SYNC} by default + */ + FlushMode getFlushMode(); + + /** + * Set the new flush mode for this session. + * @param flushMode new flush mode, can be the same as the previous one. + * @throws IllegalArgumentException if the buffer isn't empty. + */ + void setFlushMode(FlushMode flushMode); + + /** + * Set the number of operations that can be buffered. + * @param size number of ops. + * @throws IllegalArgumentException if the buffer isn't empty. + */ + default void setMutationBufferSpace(int size) { + setMutationBufferSpace(size, -1); + } + + /** + * Set the number and the maximum byte size of operations that can be buffered. + * @param numOps number of ops. + * @param maxSize max byte size of ops. + * @throws IllegalArgumentException if the buffer isn't empty. + */ + void setMutationBufferSpace(int numOps, long maxSize); + + /** + * Set the low watermark for this session. The default is set to half the mutation buffer space. + * For example, a buffer space of 1000 with a low watermark set to 50% (0.5) will start randomly + * sending PleaseRetryExceptions once there's an outstanding flush and the buffer is over 500. + * As the buffer gets fuller, it becomes likelier to hit the exception. + * @param mutationBufferLowWatermarkPercentage a new low watermark as a percentage, + * has to be between 0 and 1 (inclusive). A value of 1 disables + * the low watermark since it's the same as the high one + * @throws IllegalArgumentException if the buffer isn't empty or if the watermark isn't between + * 0 and 1 + * @deprecated The low watermark no longer has any effect. + */ + @Deprecated + void setMutationBufferLowWatermark(float mutationBufferLowWatermarkPercentage); + + /** + * Set the flush interval, which will be used for the next scheduling decision. + * @param interval interval in milliseconds. + */ + void setFlushInterval(int interval); + + /** + * Get the current timeout. + * @return operation timeout in milliseconds, 0 if none was configured. + */ + long getTimeoutMillis(); + + /** + * Sets the timeout for the next applied operations. + * The default timeout is 0, which disables the timeout functionality. + * @param timeout Timeout in milliseconds. + */ + void setTimeoutMillis(long timeout); + + /** + * Returns true if this session has already been closed. + */ + boolean isClosed(); + + /** + * Check if there are operations that haven't been completely applied. + * @return true if operations are pending, else false. + */ + boolean hasPendingOperations(); + + /** + * Set the new external consistency mode for this session. + * @param consistencyMode new external consistency mode, can the same as the previous one. + * @throws IllegalArgumentException if the buffer isn't empty. + */ + void setExternalConsistencyMode(ExternalConsistencyMode consistencyMode); + + /** + * Tells if the session is currently ignoring row errors when the whole list returned by a tablet + * server is of the AlreadyPresent type. + * @return true if the session is enforcing this, else false + */ + boolean isIgnoreAllDuplicateRows(); + + /** + * Configures the option to ignore all the row errors if they are all of the AlreadyPresent type. + * This can be useful when it is possible for INSERT operations to be retried and fail. + * The effect of enabling this is that operation responses that match this pattern will be + * cleared of their row errors, meaning that we consider them successful. + * + * TODO(KUDU-1563): Implement server side ignore capabilities to improve performance and + * reliability of INSERT ignore operations. + * + *

Disabled by default. + * @param ignoreAllDuplicateRows true if this session should enforce this, else false + */ + void setIgnoreAllDuplicateRows(boolean ignoreAllDuplicateRows); + + /** + * Tells if the session is currently ignoring row errors when the whole list returned by a tablet + * server is of the NotFound type. + * @return true if the session is enforcing this, else false + */ + boolean isIgnoreAllNotFoundRows(); + + /** + * Configures the option to ignore all the row errors if they are all of the NotFound type. + * This can be useful when it is possible for DELETE operations to be retried and fail. + * The effect of enabling this is that operation responses that match this pattern will be + * cleared of their row errors, meaning that we consider them successful. + * + * TODO(KUDU-1563): Implement server side ignore capabilities to improve performance and + * reliability of DELETE ignore operations. + * + *

Disabled by default. + * @param ignoreAllNotFoundRows true if this session should enforce this, else false + */ + void setIgnoreAllNotFoundRows(boolean ignoreAllNotFoundRows); + + /** + * Set the number of errors that can be collected. + * @param size number of errors. + */ + void setErrorCollectorSpace(int size); + + /** + * Return the number of errors which are pending. Errors may accumulate when + * using {@link FlushMode#AUTO_FLUSH_BACKGROUND AUTO_FLUSH_BACKGROUND} mode. + * @return a count of errors + */ + int countPendingErrors(); + + /** + * Return any errors from previous calls. If there were more errors + * than could be held in the session's error storage, the overflow state is set to true. + * + *

Clears the pending errors. + * @return an object that contains the errors and the overflow status + */ + RowErrorsAndOverflowStatus getPendingErrors(); + + /** + * Return cumulative write operation metrics since the beginning of the session. + * @return cumulative write operation metrics since the beginning of the session. + */ + ResourceMetrics getWriteOpMetrics(); +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SplitKeyRangeRequest.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SplitKeyRangeRequest.java new file mode 100644 index 0000000000..b1677360f8 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SplitKeyRangeRequest.java @@ -0,0 +1,132 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.List; + +import com.google.protobuf.Message; +import com.google.protobuf.UnsafeByteOperations; +import io.netty.util.Timer; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.Common.KeyRangePB; +import org.apache.kudu.security.Token; +import org.apache.kudu.tserver.Tserver; +import org.apache.kudu.util.Pair; + +/* + * RPC to split a tablet's primary key range into smaller ranges suitable for concurrent scanning. + */ +@InterfaceAudience.Private +class SplitKeyRangeRequest extends KuduRpc { + + private final byte[] startPrimaryKey; + private final byte[] endPrimaryKey; + private final byte[] partitionKey; + private final long splitSizeBytes; + + /** The token with which to authorize this RPC. */ + private Token.SignedTokenPB authzToken; + + /** + * Create a new RPC request + * @param table table to lookup + * @param startPrimaryKey the primary key to begin splitting at (inclusive), pass null to + * start at the beginning + * @param endPrimaryKey the primary key to stop splitting at (exclusive), pass null to + * get all the key ranges + * @param partitionKey the partition key of the tablet to find + * @param splitSizeBytes the size of the data in each key range. + * This is a hint: The tablet server may return the size of key range + * larger or smaller than this value. + * @param timer Timer to monitor RPC timeouts. + * @param timeoutMillis the timeout of the request in milliseconds + */ + SplitKeyRangeRequest(KuduTable table, + byte[] startPrimaryKey, + byte[] endPrimaryKey, + byte[] partitionKey, + long splitSizeBytes, + Timer timer, + long timeoutMillis) { + super(table, timer, timeoutMillis); + this.startPrimaryKey = startPrimaryKey; + this.endPrimaryKey = endPrimaryKey; + this.partitionKey = partitionKey; + this.splitSizeBytes = splitSizeBytes; + } + + @Override + Message createRequestPB() { + RemoteTablet tablet = super.getTablet(); + final Tserver.SplitKeyRangeRequestPB.Builder builder = + Tserver.SplitKeyRangeRequestPB.newBuilder(); + builder.setTabletId(UnsafeByteOperations.unsafeWrap(tablet.getTabletIdAsBytes())); + if (this.startPrimaryKey != null && this.startPrimaryKey.length > 0) { + builder.setStartPrimaryKey(UnsafeByteOperations.unsafeWrap(startPrimaryKey)); + } + if (this.endPrimaryKey != null && this.endPrimaryKey.length > 0) { + builder.setStopPrimaryKey(UnsafeByteOperations.unsafeWrap(endPrimaryKey)); + } + builder.setTargetChunkSizeBytes(splitSizeBytes); + if (authzToken != null) { + builder.setAuthzToken(authzToken); + } + + return builder.build(); + } + + @Override + boolean needsAuthzToken() { + return true; + } + + @Override + void bindAuthzToken(Token.SignedTokenPB token) { + authzToken = token; + } + + @Override + String serviceName() { + return TABLET_SERVER_SERVICE_NAME; + } + + @Override + String method() { + return "SplitKeyRange"; + } + + @Override + Pair deserialize(CallResponse callResponse, String tsUuid) { + final Tserver.SplitKeyRangeResponsePB.Builder respBuilder = + Tserver.SplitKeyRangeResponsePB.newBuilder(); + readProtobuf(callResponse.getPBMessage(), respBuilder); + + List keyRanges = new ArrayList<>(respBuilder.getRangesList()); + + SplitKeyRangeResponse response = new SplitKeyRangeResponse( + timeoutTracker.getElapsedMillis(), tsUuid, keyRanges); + return new Pair<>(response, respBuilder.hasError() ? respBuilder.getError() : null); + } + + @Override + byte[] partitionKey() { + return this.partitionKey; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SplitKeyRangeResponse.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SplitKeyRangeResponse.java new file mode 100644 index 0000000000..2b48218ee3 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/SplitKeyRangeResponse.java @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.List; + +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.Common.KeyRangePB; + +/** + * Response type for SplitKeyRangeRequest. + */ +@InterfaceAudience.Private +public class SplitKeyRangeResponse extends KuduRpcResponse { + + private List keyRanges; + + SplitKeyRangeResponse(long elapsedMillis, String tsUUID, List keyRanges) { + super(elapsedMillis, tsUUID); + this.keyRanges = keyRanges; + } + + /** + * Get the list of primary key ranges key as specified in the request. + * @return a list of key ranges + */ + public List getKeyRanges() { + return keyRanges; + } +} + diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Statistics.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Statistics.java new file mode 100644 index 0000000000..c4b0c0fc52 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Statistics.java @@ -0,0 +1,264 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLongArray; + +import com.google.common.collect.Sets; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * A Statistics belongs to a specific AsyncKuduClient. It stores client-level + * statistics including number of operations, number of bytes written, number of + * rpcs. It is created along with the client's creation, and can be obtained through + * AsyncKuduClient or KuduClient's getStatistics method. Once obtained, an instance + * of this class can be used directly. + *

+ * This class is thread-safe. The user can use it anywhere to get statistics of this + * client. + *

+ * The method {@link #toString} can be useful to get a dump of all the metrics aggregated + * for all the tablets. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class Statistics { + private final ConcurrentHashMap stsMap = + new ConcurrentHashMap<>(); + + /** + * The statistic enum to pass when querying. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public enum Statistic { + /** + * How many bytes have been written by this client. If one rpc fails, this + * statistic won't be updated. + */ + BYTES_WRITTEN(0), + /** + * How many operations have been sent to server and succeeded. + */ + WRITE_OPS(1), + /** + * How many rpcs have been sent to server and succeeded. One rpc may contain + * multiple operations. + */ + WRITE_RPCS(2), + /** + * How many operations have been sent to server but failed. + */ + OPS_ERRORS(3), + /** + * How many rpcs have been sent to server but failed. + */ + RPC_ERRORS(4); + + Statistic(int idx) { + this.idx = idx; + } + + /** + * Get index of this statistic. + * @return index + */ + int getIndex() { + return this.idx; + } + + private final int idx; + } + + /** + * Get the statistic count of this tablet. + * If the specified tablet doesn't have statistics, 0 will be returned. + * @param tabletId the tablet's id + * @param statistic the statistic type to get + * @return the value of the statistic + */ + public long getTabletStatistic(String tabletId, Statistic statistic) { + TabletStatistics tabletStatistics = stsMap.get(tabletId); + if (tabletStatistics == null) { + return 0; + } else { + return tabletStatistics.getStatistic(statistic); + } + } + + /** + * Get the statistic count of this table. + * @param tableName the table's name + * @param statistic the statistic type to get + * @return the value of the statistic + */ + public long getTableStatistic(String tableName, Statistic statistic) { + long stsResult = 0; + for (TabletStatistics tabletStatistics : stsMap.values()) { + if (!tabletStatistics.tableName.equals(tableName)) { + continue; + } + stsResult += tabletStatistics.getStatistic(statistic); + } + return stsResult; + } + + /** + * Get the statistic count of the whole client. + * @param statistic the statistic type to get + * @return the value of the statistic + */ + public long getClientStatistic(Statistic statistic) { + long stsResult = 0; + for (TabletStatistics tabletStatistics : stsMap.values()) { + stsResult += tabletStatistics.getStatistic(statistic); + } + return stsResult; + } + + /** + * Get the set of tablets which have been written into by this client, + * which have statistics information. + * @return set of tablet ids + */ + public Set getTabletSet() { + Set tablets = Sets.newHashSet(); + // This cast forces the compiler to invoke Map.keySet() rather than + // ConcurrentHashMap's override, which is critical because when this code + // is built with JDK8, ConcurrentHashMap.keySet() introduces a dependency + // on a Java 8 only API. + // + // Note: an alternative would be to always access stsMap as a Map, but that + // just moves the problem to the putIfAbsent() call in getTabletStatistics(), + // which is only a Map method in Java 8. + // + // See KUDU-2188 for details. + for (String tablet : stsMap.keySet()) { + tablets.add(tablet); + } + return tablets; + } + + /** + * Get the set of tables which have been written into by this client, + * which have statistics information. + * @return set of table names + */ + public Set getTableSet() { + Set tables = Sets.newHashSet(); + for (TabletStatistics tabletStat : stsMap.values()) { + tables.add(tabletStat.tableName); + } + return tables; + } + + /** + * Get table name of the given tablet id. + * If the tablet has no statistics, null will be returned. + * @param tabletId the tablet's id + * @return table name + */ + public String getTableName(String tabletId) { + TabletStatistics tabletStatistics = stsMap.get(tabletId); + if (tabletStatistics == null) { + return null; + } else { + return tabletStatistics.tableName; + } + } + + /** + * Get the TabletStatistics object for this specified tablet. + * @param tableName the table's name + * @param tabletId the tablet's id + * @return a TabletStatistics object + */ + Statistics.TabletStatistics getTabletStatistics(String tableName, String tabletId) { + Statistics.TabletStatistics tabletStats = stsMap.get(tabletId); + if (tabletStats == null) { + Statistics.TabletStatistics newTabletStats = new Statistics.TabletStatistics(tableName, + tabletId); + tabletStats = stsMap.putIfAbsent(tabletId, newTabletStats); + if (tabletStats == null) { + tabletStats = newTabletStats; + } + } + return tabletStats; + } + + @Override + public String toString() { + final StringBuilder buf = new StringBuilder(); + buf.append("Current client statistics: "); + buf.append("bytes written:"); + buf.append(getClientStatistic(Statistic.BYTES_WRITTEN)); + buf.append(", write rpcs:"); + buf.append(getClientStatistic(Statistic.WRITE_RPCS)); + buf.append(", rpc errors:"); + buf.append(getClientStatistic(Statistic.RPC_ERRORS)); + buf.append(", write operations:"); + buf.append(getClientStatistic(Statistic.WRITE_OPS)); + buf.append(", operation errors:"); + buf.append(getClientStatistic(Statistic.OPS_ERRORS)); + return buf.toString(); + } + + static class TabletStatistics { + private final AtomicLongArray statistics; + private final String tableName; + private final String tabletId; + + TabletStatistics(String tableName, String tabletId) { + this.tableName = tableName; + this.tabletId = tabletId; + this.statistics = new AtomicLongArray(Statistic.values().length); + } + + void incrementStatistic(Statistic statistic, long count) { + this.statistics.addAndGet(statistic.getIndex(), count); + } + + long getStatistic(Statistic statistic) { + return this.statistics.get(statistic.getIndex()); + } + + @Override + public String toString() { + final StringBuilder buf = new StringBuilder(); + buf.append("Table: "); + buf.append(tableName); + buf.append(", tablet:"); + buf.append(tabletId); + buf.append(", bytes written:"); + buf.append(getStatistic(Statistic.BYTES_WRITTEN)); + buf.append(", write rpcs:"); + buf.append(getStatistic(Statistic.WRITE_RPCS)); + buf.append(", rpc errors:"); + buf.append(getStatistic(Statistic.RPC_ERRORS)); + buf.append(", write operations:"); + buf.append(getStatistic(Statistic.WRITE_OPS)); + buf.append(", operation errors:"); + buf.append(getStatistic(Statistic.OPS_ERRORS)); + return buf.toString(); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Status.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Status.java new file mode 100644 index 0000000000..6fa227683b --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Status.java @@ -0,0 +1,425 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.WireProtocol; +import org.apache.kudu.master.Master; +import org.apache.kudu.transactions.TxnManager; +import org.apache.kudu.tserver.Tserver; + +/** + * Representation of an error code and message. + * See also {@code src/kudu/util/status.h} in the C++ codebase. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class Status { + + // Limit the message size we get from the servers as it can be quite large. + @InterfaceAudience.LimitedPrivate("Test") + static final int MAX_MESSAGE_LENGTH = 32 * 1024; + @InterfaceAudience.LimitedPrivate("Test") + static final String ABBREVIATION_CHARS = "..."; + @InterfaceAudience.LimitedPrivate("Test") + static final int ABBREVIATION_CHARS_LENGTH = ABBREVIATION_CHARS.length(); + + // Keep a single OK status object else we'll end up instantiating tons of them. + private static final Status STATIC_OK = new Status(WireProtocol.AppStatusPB.ErrorCode.OK); + + private final WireProtocol.AppStatusPB.ErrorCode code; + private final String message; + private final int posixCode; + + private Status(WireProtocol.AppStatusPB.ErrorCode code, String msg, int posixCode) { + this.code = code; + this.posixCode = posixCode; + + if (msg.length() > MAX_MESSAGE_LENGTH) { + // Truncate the message and indicate that it was abbreviated. + this.message = msg.substring(0, MAX_MESSAGE_LENGTH - ABBREVIATION_CHARS_LENGTH) + + ABBREVIATION_CHARS; + } else { + this.message = msg; + } + } + + private Status(WireProtocol.AppStatusPB appStatusPB) { + this(appStatusPB.getCode(), appStatusPB.getMessage(), appStatusPB.getPosixCode()); + } + + private Status(WireProtocol.AppStatusPB.ErrorCode code, String msg) { + this(code, msg, -1); + } + + private Status(WireProtocol.AppStatusPB.ErrorCode code) { + this(code, "", -1); + } + + // Factory methods. + + /** + * Create a status object from a master error. + * @param masterErrorPB pb object received via RPC from the master + * @return status object equivalent to the pb + */ + static Status fromMasterErrorPB(Master.MasterErrorPB masterErrorPB) { + assert masterErrorPB.hasStatus() : "no status in PB " + masterErrorPB; + return new Status(masterErrorPB.getStatus()); + } + + /** + * Create a status object from a tablet server error. + * @param tserverErrorPB pb object received via RPC from the TS + * @return status object equivalent to the pb + */ + static Status fromTabletServerErrorPB(Tserver.TabletServerErrorPB tserverErrorPB) { + assert tserverErrorPB.hasStatus() : "no status in PB " + tserverErrorPB; + return new Status(tserverErrorPB.getStatus()); + } + + /** + * Create a status object from a TxnManager's error. + * @param pbError protobuf object received via RPC from the TxnManager + * @return status object equivalent to the protobuf + */ + static Status fromTxnManagerErrorPB(TxnManager.TxnManagerErrorPB pbError) { + assert pbError.hasStatus() : "no status in PB " + pbError; + return new Status(pbError.getStatus()); + } + + /** + * Create a Status object from a {@link WireProtocol.AppStatusPB} protobuf object. + * Package-private because we shade Protobuf and this is not usable outside this package. + */ + static Status fromPB(WireProtocol.AppStatusPB pb) { + return new Status(pb); + } + // CHECKSTYLE:OFF + public static Status OK() { + return STATIC_OK; + } + + public static Status NotFound(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.NOT_FOUND, msg); + } + public static Status NotFound(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.NOT_FOUND, msg, posixCode); + } + + public static Status Corruption(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.CORRUPTION, msg); + } + public static Status Corruption(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.CORRUPTION, msg, posixCode); + } + + public static Status NotSupported(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.NOT_SUPPORTED, msg); + } + public static Status NotSupported(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.NOT_SUPPORTED, msg, posixCode); + } + + public static Status InvalidArgument(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.INVALID_ARGUMENT, msg); + } + public static Status InvalidArgument(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.INVALID_ARGUMENT, msg, posixCode); + } + + public static Status IOError(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.IO_ERROR, msg); + } + public static Status IOError(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.IO_ERROR, msg, posixCode); + } + + public static Status AlreadyPresent(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.ALREADY_PRESENT, msg); + } + public static Status AlreadyPresent(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.ALREADY_PRESENT, msg, posixCode); + } + + public static Status RuntimeError(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.RUNTIME_ERROR, msg); + } + public static Status RuntimeError(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.RUNTIME_ERROR, msg, posixCode); + } + + public static Status NetworkError(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.NETWORK_ERROR, msg); + } + public static Status NetworkError(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.NETWORK_ERROR, msg, posixCode); + } + + public static Status IllegalState(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.ILLEGAL_STATE, msg); + } + public static Status IllegalState(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.ILLEGAL_STATE, msg, posixCode); + } + + public static Status NotAuthorized(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.NOT_AUTHORIZED, msg); + } + public static Status NotAuthorized(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.NOT_AUTHORIZED, msg, posixCode); + } + + public static Status Aborted(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.ABORTED, msg); + } + public static Status Aborted(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.ABORTED, msg, posixCode); + } + + public static Status RemoteError(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.REMOTE_ERROR, msg); + } + public static Status RemoteError(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.REMOTE_ERROR, msg, posixCode); + } + + public static Status ServiceUnavailable(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.SERVICE_UNAVAILABLE, msg); + } + public static Status ServiceUnavailable(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.SERVICE_UNAVAILABLE, msg, posixCode); + } + + public static Status TimedOut(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.TIMED_OUT, msg); + } + public static Status TimedOut(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.TIMED_OUT, msg, posixCode); + } + + public static Status Uninitialized(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.UNINITIALIZED, msg); + } + public static Status Uninitialized(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.UNINITIALIZED, msg, posixCode); + } + + public static Status ConfigurationError(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.CONFIGURATION_ERROR, msg); + } + public static Status ConfigurationError(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.CONFIGURATION_ERROR, msg, posixCode); + } + + public static Status Incomplete(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.INCOMPLETE, msg); + } + public static Status Incomplete(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.INCOMPLETE, msg, posixCode); + } + + public static Status EndOfFile(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.END_OF_FILE, msg); + } + public static Status EndOfFile(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.END_OF_FILE, msg, posixCode); + } + + public static Status Immutable(String msg) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.IMMUTABLE, msg); + } + public static Status Immutable(String msg, int posixCode) { + return new Status(WireProtocol.AppStatusPB.ErrorCode.IMMUTABLE, msg, posixCode); + } + // CHECKSTYLE:ON + // Boolean status checks. + + public boolean ok() { + return code == WireProtocol.AppStatusPB.ErrorCode.OK; + } + + public boolean isCorruption() { + return code == WireProtocol.AppStatusPB.ErrorCode.CORRUPTION; + } + + public boolean isNotFound() { + return code == WireProtocol.AppStatusPB.ErrorCode.NOT_FOUND; + } + + public boolean isNotSupported() { + return code == WireProtocol.AppStatusPB.ErrorCode.NOT_SUPPORTED; + } + + public boolean isInvalidArgument() { + return code == WireProtocol.AppStatusPB.ErrorCode.INVALID_ARGUMENT; + } + + public boolean isIOError() { + return code == WireProtocol.AppStatusPB.ErrorCode.IO_ERROR; + } + + public boolean isAlreadyPresent() { + return code == WireProtocol.AppStatusPB.ErrorCode.ALREADY_PRESENT; + } + + public boolean isRuntimeError() { + return code == WireProtocol.AppStatusPB.ErrorCode.RUNTIME_ERROR; + } + + public boolean isNetworkError() { + return code == WireProtocol.AppStatusPB.ErrorCode.NETWORK_ERROR; + } + + public boolean isIllegalState() { + return code == WireProtocol.AppStatusPB.ErrorCode.ILLEGAL_STATE; + } + + public boolean isNotAuthorized() { + return code == WireProtocol.AppStatusPB.ErrorCode.NOT_AUTHORIZED; + } + + public boolean isAborted() { + return code == WireProtocol.AppStatusPB.ErrorCode.ABORTED; + } + + public boolean isRemoteError() { + return code == WireProtocol.AppStatusPB.ErrorCode.REMOTE_ERROR; + } + + public boolean isServiceUnavailable() { + return code == WireProtocol.AppStatusPB.ErrorCode.SERVICE_UNAVAILABLE; + } + + public boolean isTimedOut() { + return code == WireProtocol.AppStatusPB.ErrorCode.TIMED_OUT; + } + + public boolean isUninitialized() { + return code == WireProtocol.AppStatusPB.ErrorCode.UNINITIALIZED; + } + + public boolean isConfigurationError() { + return code == WireProtocol.AppStatusPB.ErrorCode.CONFIGURATION_ERROR; + } + + public boolean isIncomplete() { + return code == WireProtocol.AppStatusPB.ErrorCode.INCOMPLETE; + } + + public boolean isEndOfFile() { + return code == WireProtocol.AppStatusPB.ErrorCode.END_OF_FILE; + } + + public boolean isImmutable() { + return code == WireProtocol.AppStatusPB.ErrorCode.IMMUTABLE; + } + + /** + * Return a human-readable version of the status code. + * See also status.cc in the C++ codebase. + */ + private String getCodeAsString() { + switch (code.getNumber()) { + case WireProtocol.AppStatusPB.ErrorCode.OK_VALUE: + return "OK"; + case WireProtocol.AppStatusPB.ErrorCode.NOT_FOUND_VALUE: + return "Not found"; + case WireProtocol.AppStatusPB.ErrorCode.CORRUPTION_VALUE: + return "Corruption"; + case WireProtocol.AppStatusPB.ErrorCode.NOT_SUPPORTED_VALUE: + return "Not implemented"; + case WireProtocol.AppStatusPB.ErrorCode.INVALID_ARGUMENT_VALUE: + return "Invalid argument"; + case WireProtocol.AppStatusPB.ErrorCode.IO_ERROR_VALUE: + return "IO error"; + case WireProtocol.AppStatusPB.ErrorCode.ALREADY_PRESENT_VALUE: + return "Already present"; + case WireProtocol.AppStatusPB.ErrorCode.RUNTIME_ERROR_VALUE: + return "Runtime error"; + case WireProtocol.AppStatusPB.ErrorCode.NETWORK_ERROR_VALUE: + return "Network error"; + case WireProtocol.AppStatusPB.ErrorCode.ILLEGAL_STATE_VALUE: + return "Illegal state"; + case WireProtocol.AppStatusPB.ErrorCode.NOT_AUTHORIZED_VALUE: + return "Not authorized"; + case WireProtocol.AppStatusPB.ErrorCode.ABORTED_VALUE: + return "Aborted"; + case WireProtocol.AppStatusPB.ErrorCode.REMOTE_ERROR_VALUE: + return "Remote error"; + case WireProtocol.AppStatusPB.ErrorCode.SERVICE_UNAVAILABLE_VALUE: + return "Service unavailable"; + case WireProtocol.AppStatusPB.ErrorCode.TIMED_OUT_VALUE: + return "Timed out"; + case WireProtocol.AppStatusPB.ErrorCode.UNINITIALIZED_VALUE: + return "Uninitialized"; + case WireProtocol.AppStatusPB.ErrorCode.CONFIGURATION_ERROR_VALUE: + return "Configuration error"; + case WireProtocol.AppStatusPB.ErrorCode.INCOMPLETE_VALUE: + return "Incomplete"; + case WireProtocol.AppStatusPB.ErrorCode.END_OF_FILE_VALUE: + return "End of file"; + case WireProtocol.AppStatusPB.ErrorCode.IMMUTABLE_VALUE: + return "Immutable"; + default: + return "Unknown error (" + code.getNumber() + ")"; + } + } + + /** + * Get the posix code associated with the error. + * @return {@code -1} if no posix code is set. Otherwise, returns the posix code. + */ + public int getPosixCode() { + return posixCode; + } + + /** + * Get enum code name. + * Intended for internal use only. + */ + String getCodeName() { + return code.name(); + } + + /** + * Returns string error message. + * Intended for internal use only. + */ + String getMessage() { + return message; + } + + /** + * Get a human-readable version of the Status message fit for logging or display. + */ + @Override + public String toString() { + String str = getCodeAsString(); + if (code == WireProtocol.AppStatusPB.ErrorCode.OK) { + return str; + } + str = String.format("%s: %s", str, message); + if (posixCode != -1) { + str = String.format("%s (error %d)", str, posixCode); + } + return str; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/TableLocationsCache.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/TableLocationsCache.java new file mode 100644 index 0000000000..788c1f997e --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/TableLocationsCache.java @@ -0,0 +1,324 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.TreeMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import javax.annotation.concurrent.GuardedBy; +import javax.annotation.concurrent.ThreadSafe; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; +import com.google.common.base.Ticker; +import com.google.common.primitives.UnsignedBytes; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A cache of the tablet locations in a table, keyed by partition key. Entries + * in the cache are either tablets or non-covered ranges. + */ +@ThreadSafe +@InterfaceAudience.Private +class TableLocationsCache { + private static final Logger LOG = LoggerFactory.getLogger(TableLocationsCache.class); + private static final Comparator COMPARATOR = UnsignedBytes.lexicographicalComparator(); + + private final ReentrantReadWriteLock rwl = new ReentrantReadWriteLock(); + + @GuardedBy("rwl") + private final NavigableMap entries = new TreeMap<>(COMPARATOR); + + @InterfaceAudience.LimitedPrivate("Test") + static Ticker ticker = Ticker.systemTicker(); + + public Entry get(byte[] partitionKey) { + + if (partitionKey == null) { + // Master lookup. + rwl.readLock().lock(); + try { + Preconditions.checkState(entries.size() <= 1); + TableLocationsCache.Entry entry = entries.get(AsyncKuduClient.EMPTY_ARRAY); + if (entry.isStale()) { + return null; + } + return entry; + } finally { + rwl.readLock().unlock(); + } + + } + + Map.Entry entry; + rwl.readLock().lock(); + try { + entry = entries.floorEntry(partitionKey); + } finally { + rwl.readLock().unlock(); + } + + if (entry == null || + (entry.getValue().getUpperBoundPartitionKey().length > 0 && + Bytes.memcmp(partitionKey, entry.getValue().getUpperBoundPartitionKey()) >= 0) || + entry.getValue().isStale()) { + return null; + } + return entry.getValue(); + } + + /** + * Add tablet locations to the cache. + * + * Already known tablet locations will have their entry updated and deadline extended. + * + * @param tablets the discovered tablets to cache + * @param requestPartitionKey the lookup partition key + * @param requestedBatchSize the number of tablet locations requested from the master in the + * original request + * @param ttl the time in milliseconds that the tablets may be cached for + */ + public void cacheTabletLocations(List tablets, + byte[] requestPartitionKey, + int requestedBatchSize, + long ttl) { + long deadline = ticker.read() + ttl * TimeUnit.MILLISECONDS.toNanos(1); + if (requestPartitionKey == null) { + // Master lookup. + Preconditions.checkArgument(tablets.size() == 1); + Entry entry = Entry.tablet(tablets.get(0), deadline); + + rwl.writeLock().lock(); + try { + entries.clear(); + entries.put(AsyncKuduClient.EMPTY_ARRAY, entry); + } finally { + rwl.writeLock().unlock(); + } + return; + } + + List newEntries = new ArrayList<>(); + + if (tablets.isEmpty()) { + // If there are no tablets in the response, then the table is empty. If + // there were any tablets in the table they would have been returned, since + // the master guarantees that if the partition key falls in a non-covered + // range, the previous tablet will be returned, and we did not set an upper + // bound partition key on the request. + newEntries.add(Entry.nonCoveredRange(AsyncKuduClient.EMPTY_ARRAY, + AsyncKuduClient.EMPTY_ARRAY, + deadline)); + } else { + // The comments below will reference the following diagram: + // + // +---+ +---+---+ + // | | | | | + // A | B | C | D | E | F + // | | | | | + // +---+ +---+---+ + // + // It depicts a tablet locations response from the master containing three + // tablets: B, D and E. Three non-covered ranges are present: A, C, and F. + // An RPC response containing B, D and E could occur if the lookup partition + // key falls in A, B, or C, although the existence of A as an initial + // non-covered range can only be inferred if the lookup partition key falls + // in A. + + final byte[] firstLowerBound = tablets.get(0).getPartition().getPartitionKeyStart(); + + if (Bytes.memcmp(requestPartitionKey, firstLowerBound) < 0) { + // If the first tablet is past the requested partition key, then the + // partition key falls in an initial non-covered range, such as A. + newEntries.add( + Entry.nonCoveredRange(AsyncKuduClient.EMPTY_ARRAY, firstLowerBound, deadline)); + } + + // lastUpperBound tracks the upper bound of the previously processed + // entry, so that we can determine when we have found a non-covered range. + byte[] lastUpperBound = firstLowerBound; + + for (RemoteTablet tablet : tablets) { + final byte[] tabletLowerBound = tablet.getPartition().getPartitionKeyStart(); + final byte[] tabletUpperBound = tablet.getPartition().getPartitionKeyEnd(); + + if (Bytes.memcmp(lastUpperBound, tabletLowerBound) < 0) { + // There is a non-covered range between the previous tablet and this tablet. + // This will discover C while processing the tablet location for D. + newEntries.add(Entry.nonCoveredRange(lastUpperBound, tabletLowerBound, deadline)); + } + lastUpperBound = tabletUpperBound; + + // Now add the tablet itself (such as B, D, or E). + newEntries.add(Entry.tablet(tablet, deadline)); + } + + if (lastUpperBound.length > 0 && + tablets.size() < requestedBatchSize) { + // There is a non-covered range between the last tablet and the end of the + // partition key space, such as F. + newEntries.add( + Entry.nonCoveredRange(lastUpperBound, AsyncKuduClient.EMPTY_ARRAY, deadline)); + } + } + + byte[] discoveredlowerBound = newEntries.get(0).getLowerBoundPartitionKey(); + byte[] discoveredUpperBound = newEntries.get(newEntries.size() - 1) + .getUpperBoundPartitionKey(); + + LOG.debug("Discovered table locations:\t{}", newEntries); + + rwl.writeLock().lock(); + try { + // Remove all existing overlapping entries, and add the new entries. + Map.Entry floorEntry = entries.floorEntry(discoveredlowerBound); + if (floorEntry != null && + Bytes.memcmp(requestPartitionKey, + floorEntry.getValue().getUpperBoundPartitionKey()) < 0) { + discoveredlowerBound = floorEntry.getKey(); + } + + NavigableMap overlappingEntries = entries.tailMap(discoveredlowerBound, true); + if (discoveredUpperBound.length > 0) { + overlappingEntries = overlappingEntries.headMap(discoveredUpperBound, false); + } + if (LOG.isTraceEnabled()) { + LOG.trace("Existing table locations:\t\t{}", entries.values()); + LOG.trace("Removing table locations:\t\t{}", overlappingEntries.values()); + } + overlappingEntries.clear(); + + for (Entry entry : newEntries) { + entries.put(entry.getLowerBoundPartitionKey(), entry); + } + } finally { + rwl.writeLock().unlock(); + } + } + + /** + * Clears all non-covered range entries from the cache. + */ + public void clearNonCoveredRangeEntries() { + rwl.writeLock().lock(); + try { + Iterator> it = entries.entrySet().iterator(); + while (it.hasNext()) { + if (it.next().getValue().isNonCoveredRange()) { + it.remove(); + } + } + } finally { + rwl.writeLock().unlock(); + } + } + + @Override + public String toString() { + rwl.readLock().lock(); + try { + return entries.values().toString(); + } finally { + rwl.readLock().unlock(); + } + } + + /** + * An entry in the meta cache. Represents either a non-covered range, or a tablet. + */ + public static class Entry { + /** The remote tablet, only set if this entry represents a tablet. */ + private final RemoteTablet tablet; + /** The lower bound partition key, only set if this is a non-covered range. */ + private final byte[] lowerBoundPartitionKey; + /** The upper bound partition key, only set if this is a non-covered range. */ + private final byte[] upperBoundPartitionKey; + /** Deadline in ns relative the the System nanotime clock. */ + private final long deadline; + + private Entry(RemoteTablet tablet, + byte[] lowerBoundPartitionKey, + byte[] upperBoundPartitionKey, + long deadline) { + this.tablet = tablet; + this.lowerBoundPartitionKey = lowerBoundPartitionKey; + this.upperBoundPartitionKey = upperBoundPartitionKey; + this.deadline = deadline; + } + + public static Entry nonCoveredRange(byte[] lowerBoundPartitionKey, + byte[] upperBoundPartitionKey, + long deadline) { + return new Entry(null, lowerBoundPartitionKey, upperBoundPartitionKey, deadline); + } + + public static Entry tablet(RemoteTablet tablet, long deadline) { + return new Entry(tablet, null, null, deadline); + } + + /** + * @return {@code true} if this entry is a non-covered range. + */ + public boolean isNonCoveredRange() { + return tablet == null; + } + + /** + * @return the {@link RemoteTablet} for this tablet, or null + * if this is a non-covered range. + */ + public RemoteTablet getTablet() { + return tablet; + } + + public byte[] getLowerBoundPartitionKey() { + return tablet == null ? lowerBoundPartitionKey : tablet.getPartition().getPartitionKeyStart(); + } + + public byte[] getUpperBoundPartitionKey() { + return tablet == null ? upperBoundPartitionKey : tablet.getPartition().getPartitionKeyEnd(); + } + + long ttl() { + return TimeUnit.NANOSECONDS.toMillis(deadline - ticker.read()); + } + + public boolean isStale() { + return ttl() <= 0; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(isNonCoveredRange() ? "NonCoveredRange" : "Tablet") + .omitNullValues() + .add("lowerBoundPartitionKey", Bytes.hex(getLowerBoundPartitionKey())) + .add("upperBoundPartitionKey", Bytes.hex(getUpperBoundPartitionKey())) + .add("ttl", ttl()) + .add("tablet", tablet) + .toString(); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/TimeoutTracker.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/TimeoutTracker.java new file mode 100644 index 0000000000..0e99c0f39f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/TimeoutTracker.java @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.util.concurrent.TimeUnit; + +import com.google.common.base.Stopwatch; + +/** + * This is a wrapper class around {@link com.google.common.base.Stopwatch} used to track a timeout + * in the future. + *

+ * The watch starts as soon as this object is created with a timeout of 0, which means that + * there's no timeout. + * The timeout has been reached once the stopwatch's elapsed time is equal or greater than the + * provided timeout. + */ +public class TimeoutTracker { + private final Stopwatch stopwatch; + /** timeout in milliseconds **/ + private long timeout = 0; + + /** + * Creates a new tracker, which starts the stopwatch right now. + */ + public TimeoutTracker() { + this(Stopwatch.createUnstarted()); + } + + /** + * Creates a new tracker, using the specified stopwatch, and starts it right now. + * The stopwatch is reset if it was already running. + * @param stopwatch Specific Stopwatch to use + */ + public TimeoutTracker(Stopwatch stopwatch) { + if (stopwatch.isRunning()) { + stopwatch.reset(); + } + this.stopwatch = stopwatch.start(); + } + + /** + * Check if we're already past the timeout. + * @return true if we're past the timeout, otherwise false. Also returns false if no timeout + * was specified + */ + public boolean timedOut() { + if (!hasTimeout()) { + return false; + } + return timeout - stopwatch.elapsed(TimeUnit.MILLISECONDS) <= 0; + } + + /** + * Get the number of milliseconds before the timeout is reached. + *

+ * This method is used to pass down the remaining timeout to the RPCs, so has special semantics. + * A timeout of 0 is used to indicate an infinite timeout, and negative timeouts are invalid. + * Thus, if the timeout has passed (i.e. timeout - stopwatch.elapsedMillis() <= 0), + * the returned value is floored at 1. + *

+ * Callers who care about this behavior should first check {@link #timedOut()}. + * + * @return the remaining millis before the timeout is reached, or 1 if the remaining time is + * lesser or equal to 0, or Long.MAX_VALUE if no timeout was specified (in which case it + * should never be called). + * @throws IllegalStateException if this method is called and no timeout was set + */ + public long getMillisBeforeTimeout() { + if (!hasTimeout()) { + throw new IllegalStateException("This tracker doesn't have a timeout set so it cannot " + + "answer getMillisBeforeTimeout()"); + } + long millisBeforeTimeout = timeout - stopwatch.elapsed(TimeUnit.MILLISECONDS); + millisBeforeTimeout = millisBeforeTimeout <= 0 ? 1 : millisBeforeTimeout; + return millisBeforeTimeout; + } + + public long getElapsedMillis() { + return this.stopwatch.elapsed(TimeUnit.MILLISECONDS); + } + + /** + * Tells if a non-zero timeout was set. + * @return true if the timeout is greater than 0, false otherwise. + */ + public boolean hasTimeout() { + return timeout != 0; + } + + /** + * Utility method to check if sleeping for a specified amount of time would put us past the + * timeout. + * @param plannedSleepTimeMillis number of milliseconds for a planned sleep + * @return if the planned sleeps goes past the timeout. + */ + public boolean wouldSleepingTimeoutMillis(long plannedSleepTimeMillis) { + if (!hasTimeout()) { + return false; + } + return getMillisBeforeTimeout() - plannedSleepTimeMillis <= 0; + } + + /** + * Sets the timeout to 0 (no timeout) and restarts the stopwatch from scratch. + */ + public void reset() { + timeout = 0; + stopwatch.reset(); + stopwatch.start(); + } + + /** + * Get the timeout (in milliseconds). + * @return the current timeout + */ + public long getTimeout() { + return timeout; + } + + /** + * Set a new timeout for this tracker. It cannot be smaller than 0, + * and if it is 0 then it means that there is no timeout (which is the default behavior). + * This method won't call reset(). + * @param timeout a number of milliseconds greater or equal to 0 + * @throws IllegalArgumentException if the timeout is lesser than 0 + */ + public void setTimeout(long timeout) { + if (timeout < 0) { + throw new IllegalArgumentException("The timeout must be greater or equal to 0, " + + "the passed value is " + timeout); + } + this.timeout = timeout; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder("TimeoutTracker(timeout="); + buf.append(timeout); + buf.append(", elapsed=").append(stopwatch.elapsed(TimeUnit.MILLISECONDS)); + buf.append(")"); + return buf.toString(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Update.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Update.java new file mode 100644 index 0000000000..267e3144c7 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Update.java @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Operation to update columns on an existing row. + * Instances of this class should not be reused. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class Update extends Operation { + + Update(KuduTable table) { + super(table); + } + + @Override + ChangeType getChangeType() { + return ChangeType.UPDATE; + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/UpdateIgnore.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/UpdateIgnore.java new file mode 100644 index 0000000000..384d3b7456 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/UpdateIgnore.java @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Represents a single row update ignoring missing rows errors and + * errors on updating immutable cells. + * Instances of this class should not be reused. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class UpdateIgnore extends Operation { + + UpdateIgnore(KuduTable table) { + super(table); + } + + @Override + ChangeType getChangeType() { + return ChangeType.UPDATE_IGNORE; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Upsert.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Upsert.java new file mode 100644 index 0000000000..a282f813c2 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/Upsert.java @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Represents a single row upsert. Instances of this class should not be reused. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class Upsert extends Operation { + + Upsert(KuduTable table) { + super(table); + } + + @Override + ChangeType getChangeType() { + return ChangeType.UPSERT; + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/UpsertIgnore.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/UpsertIgnore.java new file mode 100644 index 0000000000..c5cff726c7 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/client/UpsertIgnore.java @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Represents a single row upsert ignoring errors on updating + * immutable cells. + * Instances of this class should not be reused. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class UpsertIgnore extends Operation { + + UpsertIgnore(KuduTable table) { + super(table); + } + + @Override + ChangeType getChangeType() { + return ChangeType.UPSERT_IGNORE; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/AsyncUtil.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/AsyncUtil.java new file mode 100644 index 0000000000..3bfacff36e --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/AsyncUtil.java @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import com.stumbleupon.async.Callback; +import com.stumbleupon.async.Deferred; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Utility methods for various parts of async, such as Deferred. + * TODO (KUDU-602): Some of these methods could eventually be contributed back to async or to a + * custom fork/derivative of async. + */ +@InterfaceAudience.Private +public class AsyncUtil { + + /** Non-constructable utility class. */ + private AsyncUtil() { + } + + /** + * Register a callback and an "errback". + *

+ * This has the exact same effect as {@link Deferred#addCallbacks(Callback, Callback)} + * keeps the type information "correct" when the callback and errback return a + * {@code Deferred}. + * @param d The {@code Deferred} we want to add the callback and errback to. + * @param cb The callback to register. + * @param eb The errback to register. + * @return {@code d} with an "updated" type. + */ + @SuppressWarnings("unchecked") + public static , E> + Deferred addCallbacksDeferring(final Deferred d, + final Callback cb, + final Callback eb) { + return d.addCallbacks((Callback) cb, eb); + } + + /** + * Workaround for {@link Deferred#addBoth}'s failure to use generics correctly. Allows callers + * to provide a {@link Callback} which takes an {@link Object} instead of the type of the deferred + * it is applied to, which avoids a runtime {@link ClassCastException} when the deferred fails. + */ + @SuppressWarnings("unchecked") + public static Deferred addBoth(final Deferred deferred, + final Callback callback) { + return ((Deferred) deferred).addBoth(callback); + } + + /** + * Workaround for {@link Deferred#addBothDeferring}'s failure to use generics correctly. Allows + * callers to provide a {@link Callback} which takes an {@link Object} instead of the type of the + * deferred it is applied to, which avoids a runtime {@link ClassCastException} when the deferred + * fails. + */ + @SuppressWarnings("unchecked") + public static Deferred addBothDeferring(final Deferred deferred, + final Callback, Object> callback) { + return ((Deferred) deferred).addBothDeferring(callback); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/BloomFilter.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/BloomFilter.java new file mode 100644 index 0000000000..703dbf5ec3 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/BloomFilter.java @@ -0,0 +1,409 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import java.nio.charset.StandardCharsets; +import java.util.BitSet; +import javax.annotation.concurrent.NotThreadSafe; + +import com.google.common.base.Preconditions; +import com.sangupta.murmur.Murmur2; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * An space-efficient filter which offers an approximate containment check. + * + *

It can be used to filter all the records which are wanted, but doesn't guarantee to filter out + * all the records which are not wanted. + * + *

Please check this wiki for more details. + * + *

The {@code BloomFilter} here is a scanning filter and used to constrain the number of records + * returned from TServer. It provides different types of {@code put} methods. When you {@code put} a + * record into {@code BloomFilter}, it means you expect the TServer to return records with + * the same value in a scan. + * + *

Here is an example for use: + *

+ * {@code
+ *   BloomFilter bf = BloomFilter.BySizeAndFPRate(numBytes);
+ *   bf.put(1);
+ *   bf.put(3);
+ *   bf.put(4);
+ *   byte[] bitSet = bf.getBitSet();
+ *   byte[] numHashes = bf.getNumHashes();
+ *   String hashFunctionName = bf.getHashFunctionName();
+ *   // TODO: implement the interface for serializing and sending
+ *   // (bitSet, numHashes, hashFunctionName) to TServer.
+ * }
+ * 
+ */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +@NotThreadSafe +public class BloomFilter { + + private final BitSet bitSet; + private final int numHashes; + private final byte[] byteBuffer; + private final HashFunction hashFunction; + private static final double DEFAULT_FP_RATE = 0.01; + + private BloomFilter(BitSet bitSet, int numHashes, HashFunction hashFunction) { + Preconditions.checkArgument(bitSet.size() >= 8, "Number of bits in " + + "bitset should be at least 8, but found %s.", bitSet.size()); + this.bitSet = bitSet; + this.numHashes = numHashes; + this.hashFunction = hashFunction; + byteBuffer = new byte[8]; + } + + /** + * Generate bloom filter, default hashing is {@code Murmur2} and false positive rate is 0.01. + * @param numBytes size of bloom filter in bytes + */ + public static BloomFilter bySize(int numBytes) { + return bySizeAndFPRate(numBytes, DEFAULT_FP_RATE); + } + + /** + * Generate bloom filter, default hashing is {@code Murmur2}. + * @param numBytes size of bloom filter in bytes + * @param fpRate the probability that TServer will erroneously return a record that has not + * ever been {@code put} into the {@code BloomFilter}. + */ + public static BloomFilter bySizeAndFPRate(int numBytes, double fpRate) { + return bySizeAndFPRate(numBytes, fpRate, HashFunctions.MURMUR2); + } + + /** + * Generate bloom filter. + * @param numBytes size of bloom filter in bytes + * @param fpRate the probability that TServer will erroneously return a record that has not + * ever been {@code put} into the {@code BloomFilter}. + * @param hashFunction hashing used when updating or checking containment, user should pick + * the hashing function from {@code HashFunctions} + */ + public static BloomFilter bySizeAndFPRate(int numBytes, double fpRate, + HashFunction hashFunction) { + int numBits = numBytes * 8; + int numHashes = computeOptimalHashCount(numBits, optimalExpectedCount(numBytes, fpRate)); + return new BloomFilter(new BitSet(numBits), numHashes, hashFunction); + } + + /** + * Generate bloom filter, default hashing is {@code Murmur2} and false positive rate is 0.01. + * @param expectedCount The expected number of elements, targeted by this bloom filter. + * It is used to size the bloom filter. + */ + public static BloomFilter byCount(int expectedCount) { + return byCountAndFPRate(expectedCount, DEFAULT_FP_RATE); + } + + /** + * Generate bloom filter, default hashing is {@code Murmur2}. + * @param expectedCount The expected number of elements, targeted by this bloom filter. + * It is used to size the bloom filter. + * @param fpRate the probability that TServer will erroneously return a record that has not + * ever been {@code put} into the {@code BloomFilter}. + */ + public static BloomFilter byCountAndFPRate(int expectedCount, double fpRate) { + return byCountAndFPRate(expectedCount, fpRate, HashFunctions.MURMUR2); + } + + /** + * Generate bloom filter. + * @param expectedCount The expected number of elements, targeted by this bloom filter. + * It is used to size the bloom filter. + * @param fpRate the probability that TServer will erroneously return a record that has not + * ever been {@code put} into the {@code BloomFilter}. + * @param hashFunction hashing used when updating or checking containment, user should pick + * the hashing function from {@code HashFunctions} + */ + public static BloomFilter byCountAndFPRate( + int expectedCount, double fpRate, HashFunction hashFunction) { + int numBytes = optimalNumOfBytes(expectedCount, fpRate); + int numBits = numBytes * 8; + int numHashes = computeOptimalHashCount(numBits, expectedCount); + return new BloomFilter(new BitSet(numBits), numHashes, hashFunction); + } + + /** + * Update bloom filter with a {@code byte[]}. + */ + public void put(byte[] data) { + updateBitset(data, data.length); + } + + /** + * Update bloom filter with a {@code boolean}. + */ + public void put(boolean data) { + byteBuffer[0] = (byte)(data ? 1 : 0); + updateBitset(byteBuffer, 1); + } + + /** + * Update bloom filter with a {@code byte}. + */ + public void put(byte data) { + byteBuffer[0] = data; + updateBitset(byteBuffer, 1); + } + + /** + * Update bloom filter with a {@code short}. + */ + public void put(short data) { + byteBuffer[0] = (byte) (data >>> 0); + byteBuffer[1] = (byte) (data >>> 8); + updateBitset(byteBuffer, 2); + } + + /** + * Update bloom filter with a {@code int}. + */ + public void put(int data) { + byteBuffer[0] = (byte) (data >>> 0); + byteBuffer[1] = (byte) (data >>> 8); + byteBuffer[2] = (byte) (data >>> 16); + byteBuffer[3] = (byte) (data >>> 24); + updateBitset(byteBuffer, 4); + } + + /** + * Update bloom filter with a {@code long}. + */ + public void put(long data) { + byteBuffer[0] = (byte) (data >>> 0); + byteBuffer[1] = (byte) (data >>> 8); + byteBuffer[2] = (byte) (data >>> 16); + byteBuffer[3] = (byte) (data >>> 24); + byteBuffer[4] = (byte) (data >>> 32); + byteBuffer[5] = (byte) (data >>> 40); + byteBuffer[6] = (byte) (data >>> 48); + byteBuffer[7] = (byte) (data >>> 56); + updateBitset(byteBuffer, 8); + } + + /** + * Update bloom filter with a {@code float}. + */ + public void put(float data) { + put(Float.floatToIntBits(data)); + } + + /** + * Update bloom filter with a {@code double}. + */ + public void put(double data) { + put(Double.doubleToLongBits(data)); + } + + /** + * Update bloom filter with a {@code String}. + */ + public void put(String data) { + put(data.getBytes(StandardCharsets.UTF_8)); + } + + /** + * Get the internal bit set in bytes. + */ + public byte[] getBitSet() { + return bitSet.toByteArray(); + } + + /** + * Get the number of hashing times when updating or checking containment. + */ + public int getNumHashes() { + return numHashes; + } + + /** + * Get the name of hashing used when updating or checking containment. + */ + public String getHashFunctionName() { + return hashFunction.toString(); + } + + // Mark it `private` and user can only use the `HashFunction` specified in the + // enumeration below. Thus user cannot send TServer a self defined `HashFunction`, + // which might not be identified by TServer. + private interface HashFunction { + long hash(byte[] data, int length, long seed); + } + + /** + * Hashing functions used when updating or checking containment for a bloom filter. + * Currently the only choice is {@code Murmur2}, but we can consider to add more hashing + * functions in the future. + */ + public enum HashFunctions implements HashFunction { + MURMUR2() { + @Override + public long hash(byte[] data, int length, long seed) { + return Murmur2.hash(data, length, seed); + } + + @Override + public String toString() { + return "Murmur2"; + } + } + } + + private void updateBitset(byte[] byteBuffer, int length) { + Preconditions.checkArgument(byteBuffer.length >= length); + long h = Murmur2.hash64(byteBuffer, length, 0); + long h1 = (0xFFFFFFFFL & h); + long h2 = (h >>> 32); + long tmp = h1; + for (int i = 0; i < numHashes; i++) { + long bitPos = tmp % bitSet.size(); + bitSet.set((int)bitPos); + tmp += h2; + } + } + + @InterfaceAudience.LimitedPrivate("Test") + public boolean mayContain(byte[] data) { + return checkIfContains(data); + } + + @InterfaceAudience.LimitedPrivate("Test") + public boolean mayContain(boolean data) { + byte[] byteBuffer = new byte[1]; + if (data) { + byteBuffer[0] = 1; + } else { + byteBuffer[0] = 0; + } + return checkIfContains(byteBuffer); + } + + @InterfaceAudience.LimitedPrivate("Test") + public boolean mayContain(byte data) { + byte[] byteBuffer = new byte[1]; + byteBuffer[0] = data; + return checkIfContains(byteBuffer); + } + + @InterfaceAudience.LimitedPrivate("Test") + public boolean mayContain(short data) { + byte[] byteBuffer = new byte[2]; + byteBuffer[0] = (byte) (data >>> 0); + byteBuffer[1] = (byte) (data >>> 8); + return checkIfContains(byteBuffer); + } + + @InterfaceAudience.LimitedPrivate("Test") + public boolean mayContain(int data) { + byte[] byteBuffer = new byte[4]; + byteBuffer[0] = (byte) (data >>> 0); + byteBuffer[1] = (byte) (data >>> 8); + byteBuffer[2] = (byte) (data >>> 16); + byteBuffer[3] = (byte) (data >>> 24); + return checkIfContains(byteBuffer); + } + + @InterfaceAudience.LimitedPrivate("Test") + public boolean mayContain(long data) { + byte[] byteBuffer = new byte[8]; + byteBuffer[0] = (byte) (data >>> 0); + byteBuffer[1] = (byte) (data >>> 8); + byteBuffer[2] = (byte) (data >>> 16); + byteBuffer[3] = (byte) (data >>> 24); + byteBuffer[4] = (byte) (data >>> 32); + byteBuffer[5] = (byte) (data >>> 40); + byteBuffer[6] = (byte) (data >>> 48); + byteBuffer[7] = (byte) (data >>> 56); + return checkIfContains(byteBuffer); + } + + @InterfaceAudience.LimitedPrivate("Test") + public boolean mayContain(float data) { + return mayContain(Float.floatToIntBits(data)); + } + + @InterfaceAudience.LimitedPrivate("Test") + public boolean mayContain(double data) { + return mayContain(Double.doubleToLongBits(data)); + } + + @InterfaceAudience.LimitedPrivate("Test") + public boolean mayContain(String data) { + return mayContain(data.getBytes(StandardCharsets.UTF_8)); + } + + private boolean checkIfContains(byte[] bytes) { + long h = Murmur2.hash64(bytes, bytes.length, 0); + + long h1 = (0xFFFFFFFFL & h); + long h2 = (h >>> 32); + long tmp = h1; + int remHashes = numHashes; + while (remHashes != 0) { + long bitPos = tmp % bitSet.size(); + if (!bitSet.get((int)bitPos)) { + return false; + } + tmp += h2; + remHashes--; + } + return true; + } + + private static double kNaturalLog2 = 0.69314; + + private static int optimalNumOfBytes(int expectedCount, double fpRate) { + if (fpRate == 0) { + fpRate = Double.MIN_VALUE; + } + return (int) Math.ceil(-expectedCount * Math.log(fpRate) / (Math.log(2) * Math.log(2) * 8)); + } + + private static int optimalExpectedCount(int numBytes, double fpRate) { + int numBits = numBytes * 8; + return (int) Math.ceil(-numBits * kNaturalLog2 * kNaturalLog2 / Math.log(fpRate)); + } + + private static int computeOptimalHashCount(int numBits, int elems) { + int numHashes = (int)(numBits * kNaturalLog2 / elems); + if (numHashes < 1) { + numHashes = 1; + } + return numHashes; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("BloomFilter(nBits="); + sb.append(bitSet.size()); + sb.append(", numHashes="); + sb.append(numHashes); + sb.append(", hashing="); + sb.append(hashFunction); + sb.append(")"); + return sb.toString(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/ByteVec.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/ByteVec.java new file mode 100644 index 0000000000..20338571fc --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/ByteVec.java @@ -0,0 +1,307 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import java.util.Arrays; +import java.util.List; +import javax.annotation.concurrent.NotThreadSafe; + +import com.google.common.base.Preconditions; +import com.google.common.io.BaseEncoding; +import com.google.common.primitives.Bytes; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * A vector of primitive bytes. + * + * The vector is backed by a contiguous array, and offers efficient random + * access. + */ +@InterfaceAudience.Private +@NotThreadSafe +public final class ByteVec implements Cloneable { + + /** Default initial capacity for new vectors. */ + @InterfaceAudience.LimitedPrivate("Test") + static final int DEFAULT_CAPACITY = 32; + + /** data backing the vector. */ + private byte[] data; + + /** offset of first unused element in data. */ + private int len; + + private ByteVec(int capacity) { + data = new byte[capacity]; + len = 0; + } + + private ByteVec(byte[] data) { + this.data = data; + this.len = data.length; + } + + /** + * Creates a new vector. + * @return the new vector. + */ + public static ByteVec create() { + return new ByteVec(DEFAULT_CAPACITY); + } + + /** + * Creates a new vector with the specified capacity. + * @param capacity the initial capacity of the vector + * @return a new vector with the specified capacity + */ + public static ByteVec withCapacity(int capacity) { + return new ByteVec(capacity); + } + + /** + * Wrap an existing array with a vector. + * The array should not be modified after this call. + * @param data the initial data for the vector + * @return a vector wrapping the data + */ + public static ByteVec wrap(byte[] data) { + return new ByteVec(data); + } + + /** Returns the number of elements the vector can hold without reallocating. */ + public int capacity() { + return data.length; + } + + /** Returns the primitive array backing the vector. The caller should not modify the array. */ + public byte[] data() { + return data; + } + + /** Returns the number of elements in the vector. */ + public int len() { + return len; + } + + /** Returns {@code true} if the vector is empty. */ + public boolean isEmpty() { + return len == 0; + } + + /** + * Reserves capacity for at least {@code additional} more elements to be + * inserted into the vector. + * + * The vector may reserve more space to avoid frequent reallocations. If the + * vector already has sufficient capacity, no reallocation will happen. + * + * @param additional capacity to reserve + */ + public void reserveAdditional(int additional) { + Preconditions.checkArgument(additional >= 0, "negative additional"); + if (data.length - len >= additional) { + return; + } + // Use a 1.5x growth factor. According to + // https://stackoverflow.com/questions/1100311/what-is-the-ideal-growth-rate-for-a-dynamically-allocated-array + // this is close to the ideal ratio, although it isn't clear if that holds + // for managed languages. + data = Arrays.copyOf(data, Math.max(len + additional, + data.length + data.length / 2)); + } + + /** + * Reserves capacity for exactly {@code additional} more elements to be + * inserted into the vector. + * + * If the vector already has sufficient capacity, no reallocation will happen. + * + * @param additional capacity to reserve + */ + public void reserveExact(int additional) { + Preconditions.checkArgument(additional >= 0, "negative additional"); + if (data.length - len >= additional) { + return; + } + data = Arrays.copyOf(data, len + additional); + } + + /** + * Shrink the capacity of the vector to match the length. + */ + public void shrinkToFit() { + if (len < data.length) { + data = Arrays.copyOf(data, len); + } + } + + /** + * Shorten the vector to be {@code len} elements long. + * If {@code len} is greater than the vector's current length, + * this has no effect. + * @param len the new length of the vector + */ + public void truncate(int len) { + Preconditions.checkArgument(len >= 0, "negative len"); + this.len = Math.min(this.len, len); + } + + /** + * Removes all elements from the vector. + * No reallocation will be performed. + */ + public void clear() { + truncate(0); + } + + /** + * Appends an element to the vector. + * @param element the element to append + */ + public void push(byte element) { + reserveAdditional(1); + data[len++] = element; + } + + /** + * Sets the element at {@code index} to the provided value. + * @param index of the element to set + * @param value to set the element to + * @throws IndexOutOfBoundsException if {@code index} is not valid + */ + public void set(int index, byte value) { + if (index >= len) { + throw new IndexOutOfBoundsException(String.format("index: %s, len: %s", index, len)); + } + data[index] = value; + } + + /** + * Appends the bytes from another byte array to this vec. + * @param values the values to append + * @param offset the offset into {@code values} to append from + * @param len the number of bytes from {@code values} to append + */ + public void append(byte[] values, int offset, int len) { + reserveAdditional(len); + System.arraycopy(values, offset, this.data, this.len, len); + this.len += len; + } + + /** + * Appends all of the bytes from another byte array to this vec. + * @param values the values to append + */ + public void append(byte[] values) { + append(values, 0, values.length); + } + + /** + * Concatenates another vector onto the end of this one. + * @param other the other vector to concatenate onto this one + */ + public void append(ByteVec other) { + append(other.data, 0, other.len); + } + + /** + * Returns the element at the specified position. + * @param index of the element to return + * @return the element at the specified position + * @throws IndexOutOfBoundsException if the index is out of range + */ + public byte get(int index) { + if (index >= len) { + throw new IndexOutOfBoundsException(String.format("index: %s, len: %s", index, len)); + } + return data[index]; + } + + /** + * Returns a list view of the vector. + * The vector should not be concurrently modified while the list is in use. + * @return a list view of the vector + */ + public List asList() { + List list = Bytes.asList(data); + if (len < data.length) { + return list.subList(0, len); + } + return list; + } + + /** + * @return a copy of the vector as a byte[]. + */ + public byte[] toArray() { + return Arrays.copyOf(data, len); + } + + /** {@inheritDoc} */ + @Override + public String toString() { + if (len == 0) { + return "[]"; + } + + StringBuilder builder = new StringBuilder(4 + len * 2); + builder.append("[0x"); + builder.append(BaseEncoding.base16().encode(data, 0, len)); + builder.append(']'); + return builder.toString(); + } + + /** {@inheritDoc} */ + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ByteVec other = (ByteVec) o; + if (len != other.len) { + return false; + } + for (int i = 0; i < len; i++) { + if (data[i] != other.data[i]) { + return false; + } + } + return true; + } + + /** {@inheritDoc} */ + @Override + public int hashCode() { + int result = len; + for (int i = 0; i < len; i++) { + result = 31 * result + data[i]; + } + return result; + } + + /** {@inheritDoc} */ + @Override + public ByteVec clone() { + ByteVec clone = ByteVec.withCapacity(data.length); + clone.append(this); + return clone; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/CharUtil.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/CharUtil.java new file mode 100644 index 0000000000..e08879b4d1 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/CharUtil.java @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.ColumnTypeAttributes; + +@InterfaceAudience.Private +public class CharUtil { + public static final int MIN_VARCHAR_LENGTH = 1; + public static final int MAX_VARCHAR_LENGTH = 65535; + + /** Non-constructable utility class. */ + private CharUtil() { + } + + /** + * Convenience method to create column type attributes for VARCHAR columns. + * @param length the length. + * @return the column type attributes. + */ + public static ColumnTypeAttributes typeAttributes(int length) { + return new ColumnTypeAttributes.ColumnTypeAttributesBuilder() + .length(length) + .build(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/DataGenerator.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/DataGenerator.java new file mode 100644 index 0000000000..b20010cdce --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/DataGenerator.java @@ -0,0 +1,252 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.sql.Date; +import java.util.Base64; +import java.util.List; +import java.util.Random; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnTypeAttributes; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.PartialRow; + +/** + * A utility class to generate random data and rows. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class DataGenerator { + + private final Random random; + private final int stringLength; + private final int binaryLength; + private final float nullRate; + private final float defaultRate; + + private DataGenerator(final Random random, + final int stringLength, + final int binaryLength, + final float nullRate, + final float defaultRate) { + this.random = random; + this.stringLength = stringLength; + this.binaryLength = binaryLength; + this.nullRate = nullRate; + this.defaultRate = defaultRate; + } + + /** + * Randomizes the fields in a given PartialRow. + * @param row the PartialRow to randomize. + */ + public void randomizeRow(PartialRow row) { + this.randomizeRow(row, true); + } + + /** + * Randomizes the fields in a given PartialRow. + * @param row the PartialRow to randomize. + * @param randomizeKeys true if the key columns should be randomized. + */ + public void randomizeRow(PartialRow row, boolean randomizeKeys) { + Schema schema = row.getSchema(); + List columns = schema.getColumns(); + for (int i = 0; i < columns.size(); i++) { + ColumnSchema col = columns.get(i); + if (col.isKey() && !randomizeKeys) { + continue; + } + Type type = col.getType(); + if (col.isNullable() && random.nextFloat() <= nullRate) { + // Sometimes set nullable columns to null. + row.setNull(i); + continue; + } + if (col.getDefaultValue() != null && !col.isKey() && random.nextFloat() <= defaultRate) { + // Sometimes use the column default value. + continue; + } + switch (type) { + // TODO(ghenke): Support range bound configuration. + case BOOL: + row.addBoolean(i, random.nextBoolean()); + break; + case INT8: + row.addByte(i, (byte) random.nextInt()); + break; + case INT16: + row.addShort(i, (short) random.nextInt()); + break; + case INT32: + row.addInt(i, random.nextInt()); + break; + case DATE: + row.addDate(i, randomDate(random)); + break; + case INT64: + case UNIXTIME_MICROS: + row.addLong(i, random.nextLong()); + break; + case FLOAT: + row.addFloat(i, random.nextFloat()); + break; + case DOUBLE: + row.addDouble(i, random.nextDouble()); + break; + case DECIMAL: + row.addDecimal(i, randomDecimal(col.getTypeAttributes(), random)); + break; + case VARCHAR: + row.addVarchar(i, randomString(Math.min(col.getTypeAttributes().getLength(), + stringLength), random)); + break; + case STRING: + row.addString(i, randomString(stringLength, random)); + break; + case BINARY: + row.addBinary(i, randomBinary(binaryLength, random)); + break; + default: + throw new UnsupportedOperationException("Unsupported type " + type); + } + } + } + + /** + * Utility method to return a random integer value which can be converted into + * correct Kudu Date value + */ + public static Date randomDate(Random random) { + final int bound = DateUtil.MAX_DATE_VALUE - DateUtil.MIN_DATE_VALUE + 1; + int days = random.nextInt(bound) + DateUtil.MIN_DATE_VALUE; + return DateUtil.epochDaysToSqlDate(days); + } + + /** + * Utility method to return a random decimal value. + */ + public static BigDecimal randomDecimal(ColumnTypeAttributes attributes, Random random) { + int numBits = BigInteger.TEN.pow(attributes.getPrecision()) + .subtract(BigInteger.ONE).bitCount(); + BigInteger randomUnscaled = new BigInteger(numBits, random); + return new BigDecimal(randomUnscaled, attributes.getScale()); + } + + /** + * Utility method to return a random string value. + */ + public static String randomString(int length, Random random) { + byte[] bytes = new byte[length]; + random.nextBytes(bytes); + return Base64.getEncoder().encodeToString(bytes); + } + + /** + * Utility method to return a random binary value. + */ + public static byte[] randomBinary(int length, Random random) { + byte[] bytes = new byte[length]; + random.nextBytes(bytes); + return bytes; + } + + /** + * A builder to configure and construct a DataGenerator instance. + */ + public static class DataGeneratorBuilder { + + private Random random = new Random(System.currentTimeMillis()); + private int stringLength = 128; + private int binaryLength = 128; + private float nullRate = 0.1f; + private float defaultRate = 0.1f; + + public DataGeneratorBuilder() { + } + + /** + * Define a custom Random instance to use for any random generation. + * @return this instance + */ + public DataGeneratorBuilder random(Random random) { + this.random = random; + return this; + } + + /** + * Define the length of the data when randomly generating column values for string columns. + * @return this instance + */ + public DataGeneratorBuilder stringLength(int stringLength) { + this.stringLength = stringLength; + return this; + } + + /** + * Define the length of the data when randomly generating column values for binary columns. + * @return this instance + */ + public DataGeneratorBuilder binaryLength(int binaryLength) { + this.binaryLength = binaryLength; + return this; + } + + /** + * Define the rate at which null values should be used when randomly generating + * column values. + * @return this instance + */ + public DataGeneratorBuilder nullRate(float nullRate) { + Preconditions.checkArgument(nullRate >= 0f && nullRate <= 1f, + "nullRate must be between 0 and 1"); + this.nullRate = nullRate; + return this; + } + + /** + * Define the rate at which default values should be used when randomly generating + * column values. + * @return this instance + */ + public DataGeneratorBuilder defaultRate(float defaultRate) { + Preconditions.checkArgument(defaultRate >= 0f && defaultRate <= 1f, + "defaultRate must be between 0 and 1"); + this.defaultRate = defaultRate; + return this; + } + + public DataGenerator build() { + return new DataGenerator( + random, + stringLength, + binaryLength, + nullRate, + defaultRate + ); + } + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/DateUtil.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/DateUtil.java new file mode 100644 index 0000000000..f71da3e13e --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/DateUtil.java @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import java.sql.Date; +import java.time.LocalDate; + +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class DateUtil { + public static final int MIN_DATE_VALUE = + (int)LocalDate.parse("0001-01-01").toEpochDay(); // -719162 + public static final int MAX_DATE_VALUE = + (int)LocalDate.parse("9999-12-31").toEpochDay(); // 2932896 + + /** Non-constructable utility class. */ + private DateUtil() { + } + + /** + * Check whether the date is within the range '0001-01-01':'9999-12-31' + * + * @param days the number days since the Unix epoch + */ + public static void checkDateWithinRange(long days) { + if (days < MIN_DATE_VALUE || days > MAX_DATE_VALUE) { + throw new IllegalArgumentException( + "Date value <" + days + ">} is out of range '0001-01-01':'9999-12-31'"); + } + } + + /** + * Converts a {@link java.sql.Date} to the number of days since the Unix epoch + * (1970-01-01T00:00:00Z). + * + * @param date the date to convert to days + * @return the number days since the Unix epoch + */ + public static int sqlDateToEpochDays(Date date) { + long days = date.toLocalDate().toEpochDay(); + checkDateWithinRange(days); + return (int)days; + } + + /** + * Converts a number of days since the Unix epoch to a {@link java.sql.Date}. + * + * @param days the number of days since the Unix epoch + * @return the corresponding Date + */ + public static Date epochDaysToSqlDate(int days) { + checkDateWithinRange(days); + return Date.valueOf(LocalDate.ofEpochDay(days)); + } + + /** + * Transforms a number of days since the Unix epoch into a string according the ISO-8601 format. + * + * @param days the number of days since the Unix epoch + * @return a string, in the format: YYYY-MM-DD + */ + public static String epochDaysToDateString(int days) { + return LocalDate.ofEpochDay(days).toString(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/DecimalUtil.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/DecimalUtil.java new file mode 100644 index 0000000000..a3a9871001 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/DecimalUtil.java @@ -0,0 +1,158 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import static org.apache.kudu.Common.DataType; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.math.RoundingMode; + +import com.google.common.base.Strings; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.ColumnTypeAttributes; + +@InterfaceAudience.Private +public class DecimalUtil { + public static final int MAX_DECIMAL32_PRECISION = 9; + public static final int MAX_UNSCALED_DECIMAL32 = 999999999; + public static final int MIN_UNSCALED_DECIMAL32 = -MAX_UNSCALED_DECIMAL32; + public static final int DECIMAL32_SIZE = 32 / Byte.SIZE; + + public static final int MAX_DECIMAL64_PRECISION = 18; + public static final long MAX_UNSCALED_DECIMAL64 = 999999999999999999L; + public static final long MIN_UNSCALED_DECIMAL64 = -MAX_UNSCALED_DECIMAL64; + public static final int DECIMAL64_SIZE = 64 / Byte.SIZE; + + public static final int MAX_DECIMAL128_PRECISION = 38; + public static final BigInteger MAX_UNSCALED_DECIMAL128 = + new BigInteger(Strings.repeat("9", MAX_DECIMAL128_PRECISION)); + public static final BigInteger MIN_UNSCALED_DECIMAL128 = MAX_UNSCALED_DECIMAL128.negate(); + public static final int DECIMAL128_SIZE = 128 / Byte.SIZE; + + public static final int MIN_DECIMAL_PRECISION = 1; + public static final int MAX_DECIMAL_PRECISION = MAX_DECIMAL128_PRECISION; + + /** Non-constructable utility class. */ + private DecimalUtil() { + } + + /** + * Given a precision, returns the size of the Decimal in Bytes. + * @return the size in Bytes. + */ + public static int precisionToSize(int precision) { + if (precision <= MAX_DECIMAL32_PRECISION) { + return DECIMAL32_SIZE; + } else if (precision <= MAX_DECIMAL64_PRECISION) { + return DECIMAL64_SIZE; + } else if (precision <= MAX_DECIMAL128_PRECISION) { + return DECIMAL128_SIZE; + } else { + throw new IllegalArgumentException("Unsupported decimal type precision: " + precision); + } + } + + /** + * Given a precision, returns the smallest unscaled data type. + * @return the smallest valid DataType. + */ + public static DataType precisionToDataType(int precision) { + if (precision <= MAX_DECIMAL32_PRECISION) { + return DataType.DECIMAL32; + } else if (precision <= MAX_DECIMAL64_PRECISION) { + return DataType.DECIMAL64; + } else if (precision <= MAX_DECIMAL128_PRECISION) { + return DataType.DECIMAL128; + } else { + throw new IllegalArgumentException("Unsupported decimal type precision: " + precision); + } + } + + /** + * Returns the maximum value of a Decimal give a precision and scale. + * @param precision the precision of the decimal. + * @param scale the scale of the decimal. + * @return the maximum decimal value. + */ + public static BigDecimal maxValue(int precision, int scale) { + String maxPrecision = Strings.repeat("9", precision); + return new BigDecimal(new BigInteger(maxPrecision), scale); + } + + /** + * Returns the minimum value of a Decimal give a precision and scale. + * @param precision the precision of the decimal. + * @param scale the scale of the decimal. + * @return the minimum decimal value. + */ + public static BigDecimal minValue(int precision, int scale) { + return maxValue(precision, scale).negate(); + } + + /** + * Returns the smallest value of a Decimal give a precision and scale. + * This value can be useful for incrementing a Decimal. + * @param scale the scale of the decimal. + * @return the smallest decimal value. + */ + public static BigDecimal smallestValue(int scale) { + return new BigDecimal(BigInteger.ONE, scale); + } + + /** + * Attempts to coerce a big decimal to a target precision and scale and + * returns the result. Throws an {@link IllegalArgumentException} if the value + * can't be coerced without rounding or exceeding the targetPrecision. + * + * @param val the BigDecimal value to coerce. + * @param targetPrecision the target precision of the coerced value. + * @param targetScale the target scale of the coerced value. + * @return the coerced BigDecimal value. + */ + public static BigDecimal coerce(BigDecimal val, int targetPrecision, int targetScale) { + if (val.scale() != targetScale) { + try { + val = val.setScale(targetScale, RoundingMode.UNNECESSARY); + } catch (ArithmeticException ex) { + throw new IllegalArgumentException("Value scale " + val.scale() + + " can't be coerced to target scale " + targetScale + ". "); + } + } + if (val.precision() > targetPrecision) { + throw new IllegalArgumentException("Value precision " + val.precision() + + " (after scale coercion) can't be coerced to target precision " + + targetPrecision + ". "); + } + return val; + } + + /** + * Convenience method to create column type attributes for decimal columns. + * @param precision the precision. + * @param scale the scale. + * @return the column type attributes. + */ + public static ColumnTypeAttributes typeAttributes(int precision, int scale) { + return new ColumnTypeAttributes.ColumnTypeAttributesBuilder() + .precision(precision) + .scale(scale) + .build(); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/HashUtil.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/HashUtil.java new file mode 100644 index 0000000000..10ecdc4d94 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/HashUtil.java @@ -0,0 +1,148 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +/** + * Hash utility functions. + */ +public class HashUtil { + // Constants imported from Apache Impala used to compute hash values for special cases. + // They are arbitrary constant obtained by taking lower bytes of generated UUID. Helps + // distinguish NULL values and zero-length objects like empty strings. + // Impala uses the direct BlockBloomFilter C++ API and inserts hash value directly using + // its own implementation of the Fast hash. Hence the value must match with Impala. + // Though Impala will use C++ API, keeping the implementation of the Fast hash algorithm + // consistent across C++ and Java. + private static final int HASH_VAL_NULL = 0x58081667; + private static final byte[] HASH_VAL_NULL_BYTE_BUF = new byte[4]; + + private static final int HASH_VAL_EMPTY = 0x7dca7eee; + private static final byte[] HASH_VAL_EMPTY_BYTE_BUF = new byte[4]; + + static { + HASH_VAL_NULL_BYTE_BUF[0] = (byte) (HASH_VAL_NULL >>> 0); + HASH_VAL_NULL_BYTE_BUF[1] = (byte) (HASH_VAL_NULL >>> 8); + HASH_VAL_NULL_BYTE_BUF[2] = (byte) (HASH_VAL_NULL >>> 16); + HASH_VAL_NULL_BYTE_BUF[3] = (byte) (HASH_VAL_NULL >>> 24); + + HASH_VAL_EMPTY_BYTE_BUF[0] = (byte) (HASH_VAL_EMPTY >>> 0); + HASH_VAL_EMPTY_BYTE_BUF[1] = (byte) (HASH_VAL_EMPTY >>> 8); + HASH_VAL_EMPTY_BYTE_BUF[2] = (byte) (HASH_VAL_EMPTY >>> 16); + HASH_VAL_EMPTY_BYTE_BUF[3] = (byte) (HASH_VAL_EMPTY >>> 24); + } + + /** Non-constructable utility class. */ + private HashUtil() { + } + + /** + * Compute 64-bit FastHash of the supplied data backed by byte array. + * + * FastHash is simple, robust, and efficient general-purpose hash function from Google. + * Implementation is adapted from https://code.google.com/archive/p/fast-hash/ + * + * Adds special handling for null input. + * + * @param buf the data to hash + * @param seed seed to compute the hash + * @return computed 64-bit hash value + */ + public static long fastHash64(byte[] buf, long seed) { + // Special handling for null input with possible non-zero length as could be the + // case with nullable column values. + if (buf == null) { + buf = HASH_VAL_NULL_BYTE_BUF; + } else if (buf.length == 0) { + buf = HASH_VAL_EMPTY_BYTE_BUF; + } + final int len = buf.length; + final long m = 0x880355f21e6d1965L; + long h = seed ^ (len * m); + long v; + + int len8 = len / 8; + for (int i = 0; i < len8; ++i) { + int pos = i * 8; + v = (buf[pos] & 0xFF) | + ((long)(buf[pos + 1] & 0xFF) << 8) | ((long)(buf[pos + 2] & 0xFF) << 16) | + ((long)(buf[pos + 3] & 0xFF) << 24) | ((long)(buf[pos + 4] & 0xFF) << 32) | + ((long)(buf[pos + 5] & 0xFF) << 40) | ((long)(buf[pos + 6] & 0xFF) << 48) | + ((long)(buf[pos + 7] & 0xFF) << 56); + h ^= fastHashMix(v); + h *= m; + } + + v = 0; + int pos2 = len8 * 8; + //CHECKSTYLE:OFF + switch (len & 7) { + case 7: + v ^= (long)(buf[pos2 + 6] & 0xFF) << 48; + // fall through + case 6: + v ^= (long)(buf[pos2 + 5] & 0xFF) << 40; + // fall through + case 5: + v ^= (long)(buf[pos2 + 4] & 0xFF) << 32; + // fall through + case 4: + v ^= (long)(buf[pos2 + 3] & 0xFF) << 24; + // fall through + case 3: + v ^= (long)(buf[pos2 + 2] & 0xFF) << 16; + // fall through + case 2: + v ^= (long)(buf[pos2 + 1] & 0xFF) << 8; + // fall through + case 1: + v ^= (buf[pos2] & 0xFF); + h ^= fastHashMix(v); + h *= m; + } + //CHECKSTYLE:ON + + return fastHashMix(h); + } + + + /** + * Compute 32-bit FastHash of the supplied data backed by byte array. + * + * FastHash is simple, robust, and efficient general-purpose hash function from Google. + * Implementation is adapted from https://code.google.com/archive/p/fast-hash/ + * + * @param buf the data to compute the hash + * @param seed seed to compute the hash + * @return computed 32-bit hash value + */ + public static int fastHash32(byte[] buf, int seed) { + // the following trick converts the 64-bit hashcode to Fermat + // residue, which shall retain information from both the higher + // and lower parts of hashcode. + long h = fastHash64(buf, seed); + return (int)(h - (h >>> 32)); + } + + // Compression function for Merkle-Damgard construction. + private static long fastHashMix(long h) { + h ^= h >>> 23; + h *= 0x2127599bf4325c37L; + h ^= h >>> 47; + return h; + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/HybridTimeUtil.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/HybridTimeUtil.java new file mode 100644 index 0000000000..0672a62c7a --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/HybridTimeUtil.java @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import java.util.concurrent.TimeUnit; + +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Set of common utility methods to handle HybridTime and related timestamps. + */ +@InterfaceAudience.Private +public class HybridTimeUtil { + + public static final int hybridTimeNumBitsToShift = 12; + public static final int hybridTimeLogicalBitsMask = (1 << hybridTimeNumBitsToShift) - 1; + + /** Non-constructable utility class. */ + private HybridTimeUtil() { + } + + /** + * Converts the provided timestamp, in the provided unit, to the HybridTime timestamp + * format. Logical bits are set to 0. + * + * @param timestamp the value of the timestamp, must be greater than 0 + * @param timeUnit the time unit of the timestamp + * @throws IllegalArgumentException if the timestamp is less than 0 + */ + public static long clockTimestampToHTTimestamp(long timestamp, TimeUnit timeUnit) { + if (timestamp < 0) { + throw new IllegalArgumentException("Timestamp cannot be less than 0"); + } + long timestampInMicros = TimeUnit.MICROSECONDS.convert(timestamp, timeUnit); + return timestampInMicros << hybridTimeNumBitsToShift; + } + + /** + * Extracts the physical and logical values from an HT timestamp. + * + * @param htTimestamp the encoded HT timestamp + * @return a pair of {physical, logical} long values in an array + */ + //CHECKSTYLE:OFF + public static long[] HTTimestampToPhysicalAndLogical(long htTimestamp) { + //CHECKSTYLE:ON + long timestampInMicros = htTimestamp >> hybridTimeNumBitsToShift; + long logicalValues = htTimestamp & hybridTimeLogicalBitsMask; + return new long[] {timestampInMicros, logicalValues}; + } + + /** + * Encodes separate physical and logical components into a single HT timestamp + * + * @param physical the physical component, in microseconds + * @param logical the logical component + * @return an encoded HT timestamp + */ + public static long physicalAndLogicalToHTTimestamp(long physical, long logical) { + return (physical << hybridTimeNumBitsToShift) + logical; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/LogThrottler.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/LogThrottler.java new file mode 100644 index 0000000000..00dc224520 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/LogThrottler.java @@ -0,0 +1,316 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import java.time.Instant; + +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; + +/** + * This class suppresses messages by not allowing more than one message per a number of seconds + * provided at the call-site of the logging functions. Each instance of this class is designed to + * throttle regardless of the message. + * TODO(mreddy): If functionality is ever expanded, use ConcurrentHashMap to store multiple messages + * and the last time it was logged, only one instance of LogThrottler will be needed per class + * as this would be used at multiple call-sites to throttle different messages + * TODO(mreddy): Use integer as hashing key rather than string for performance costs, store integers + * on file with call-sites, put onus on devs to provide integers for each unique message to throttle + * TODO(mreddy): Add count to keep track of how many messages have been suppressed + */ +@InterfaceAudience.Private +public class LogThrottler { + + private final Logger log; + private long lastLoggedTimeSecs = -1; + + public LogThrottler(Logger log) { + this.log = log; + } + + /** + * Throttles the log trace message 'msg' if the last message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param msg string message to be logged + */ + public void trace(long seconds, String msg) { + if (shouldLog(seconds)) { + log.trace(msg); + } + } + + /** + * Throttles the log trace message according to specified format and argument if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arg argument for format string + */ + public void trace(long seconds, String format, Object arg) { + if (shouldLog(seconds)) { + log.trace(format, arg); + } + } + + /** + * Throttles the log trace message according to specified format and arguments if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arg1 first argument for format string + * @param arg2 second argument for format string + */ + public void trace(long seconds, String format, Object arg1, Object arg2) { + if (shouldLog(seconds)) { + log.trace(format, arg1, arg2); + } + } + + /** + * Throttles the log trace message according to specified format and arguments if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arguments list of 3 or more arguments for format string + */ + public void trace(long seconds, String format, Object... arguments) { + if (shouldLog(seconds)) { + log.trace(format, arguments); + } + } + + /** + * Throttles the log warn message 'msg' if the last message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param msg string message to be logged + */ + public void warn(long seconds, String msg) { + if (shouldLog(seconds)) { + log.warn(msg); + } + } + + /** + * Throttles the log warn message according to specified format and argument if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arg argument for format string + */ + public void warn(long seconds, String format, Object arg) { + if (shouldLog(seconds)) { + log.warn(format, arg); + } + } + + /** + * Throttles the log warn message according to specified format and arguments if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arg1 first argument for format string + * @param arg2 second argument for format string + */ + public void warn(long seconds, String format, Object arg1, Object arg2) { + if (shouldLog(seconds)) { + log.warn(format, arg1, arg2); + } + } + + /** + * Throttles the log warn message according to specified format and arguments if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arguments list of 3 or more arguments for format string + */ + public void warn(long seconds, String format, Object... arguments) { + if (shouldLog(seconds)) { + log.warn(format, arguments); + } + } + + /** + * Throttles the log error message 'msg' if the last message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param msg string message to be logged + */ + public void error(long seconds, String msg) { + if (shouldLog(seconds)) { + log.error(msg); + } + } + + /** + * Throttles the log error message according to specified format and argument if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arg argument for format string + */ + public void error(long seconds, String format, Object arg) { + if (shouldLog(seconds)) { + log.error(format, arg); + } + } + + /** + * Throttles the log error message according to specified format and arguments if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arg1 first argument for format string + * @param arg2 second argument for format string + */ + public void error(long seconds, String format, Object arg1, Object arg2) { + if (shouldLog(seconds)) { + log.error(format, arg1, arg2); + } + } + + /** + * Throttles the log error message according to specified format and arguments if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arguments list of 3 or more arguments for format string + */ + public void error(long seconds, String format, Object... arguments) { + if (shouldLog(seconds)) { + log.error(format, arguments); + } + } + + /** + * Throttles the log info message 'msg' if the last message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param msg string message to be logged + */ + public void info(long seconds, String msg) { + if (shouldLog(seconds)) { + log.info(msg); + } + } + + /** + * Throttles the log info message according to specified format and argument if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arg argument for format string + */ + public void info(long seconds, String format, Object arg) { + if (shouldLog(seconds)) { + log.info(format, arg); + } + } + + /** + * Throttles the log info message according to specified format and arguments if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arg1 first argument for format string + * @param arg2 second argument for format string + */ + public void info(long seconds, String format, Object arg1, Object arg2) { + if (shouldLog(seconds)) { + log.info(format, arg1, arg2); + } + } + + /** + * Throttles the log info message according to specified format and arguments if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arguments list of 3 or more arguments for format string + */ + public void info(long seconds, String format, Object... arguments) { + if (shouldLog(seconds)) { + log.info(format, arguments); + } + } + + /** + * Throttles the log debug message 'msg' if the last message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param msg string message to be logged + */ + public void debug(long seconds, String msg) { + if (shouldLog(seconds)) { + log.debug(msg); + } + } + + /** + * Throttles the log debug message according to specified format and argument if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arg argument for format string + */ + public void debug(long seconds, String format, Object arg) { + if (shouldLog(seconds)) { + log.debug(format, arg); + } + } + + /** + * Throttles the log debug message according to specified format and arguments if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arg1 first argument for format string + * @param arg2 second argument for format string + */ + public void debug(long seconds, String format, Object arg1, Object arg2) { + if (shouldLog(seconds)) { + log.debug(format, arg1, arg2); + } + } + + /** + * Throttles the log debug message according to specified format and arguments if the last + * message was logged less than 'seconds' ago + * @param seconds number of seconds between each desired log message + * @param format format string + * @param arguments list of 3 or more arguments for format string + */ + public void debug(long seconds, String format, Object... arguments) { + if (shouldLog(seconds)) { + log.debug(format, arguments); + } + } + + /** + * Returns true if first time logging message or it's been more than longer than the parameter + * duration in seconds indicating to call-site to log the message, returns false to let call-site + * know not to log the message + * @param throttlingIntervalSecs number of seconds between each desired log message + * @return boolean indicating whether or not to log + */ + private synchronized boolean shouldLog(long throttlingIntervalSecs) { + long nowSecs = Instant.now().getEpochSecond(); + if (lastLoggedTimeSecs == -1 || lastLoggedTimeSecs + throttlingIntervalSecs < nowSecs) { + lastLoggedTimeSecs = nowSecs; + return true; + } + return false; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/NetUtil.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/NetUtil.java new file mode 100644 index 0000000000..5f40e49ba4 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/NetUtil.java @@ -0,0 +1,171 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.net.UnknownHostException; +import java.util.List; + +import com.google.common.base.Functions; +import com.google.common.base.Joiner; +import com.google.common.base.Splitter; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.client.HostAndPort; + +/** + * Networking related methods. + */ +@InterfaceAudience.Private +public class NetUtil { + + private static final Logger LOG = LoggerFactory.getLogger(NetUtil.class); + + /** Non-constructable utility class. */ + private NetUtil() { + } + + /** + * Convert a list of {@link HostAndPort} objects to a comma separate string. + * The inverse of {@link #parseStrings(String, int)}. + * + * @param hostsAndPorts A list of {@link HostAndPort} objects. + * @return Comma separate list of "host:port" pairs. + */ + public static String hostsAndPortsToString(List hostsAndPorts) { + return Joiner.on(",").join(Lists.transform(hostsAndPorts, Functions.toStringFunction())); + } + + /** + * Parse a "host:port" pair into a {@link HostAndPort} object. + * If there is no port specified in the string, then 'defaultPort' is used. + * + * @param addrString A host or a "host:port" pair. + * @param defaultPort Default port to use if no port is specified in addrString. + * @return The HostAndPort object constructed from addrString. + */ + public static HostAndPort parseString(String addrString, int defaultPort) { + // Use Guava's HostAndPort so we don't need to handle parsing ourselves. + com.google.common.net.HostAndPort hostAndPort = addrString.indexOf(':') == -1 ? + com.google.common.net.HostAndPort.fromParts(addrString, defaultPort) : + com.google.common.net.HostAndPort.fromString(addrString); + return new HostAndPort(hostAndPort.getHost(), hostAndPort.getPort()); + } + + /** + * Parse a comma separated list of "host:port" pairs into a list of + * {@link HostAndPort} objects. If no port is specified for an entry in + * the comma separated list, then a default port is used. + * The inverse of {@link #hostsAndPortsToString(List)}. + * + * @param commaSepAddrs The comma separated list of "host:port" pairs. + * @param defaultPort The default port to use if no port is specified. + * @return A list of HostAndPort objects constructed from commaSepAddrs. + */ + public static List parseStrings(final String commaSepAddrs, int defaultPort) { + Iterable addrStrings = Splitter.on(',').trimResults().split(commaSepAddrs); + List hostsAndPorts = Lists.newArrayListWithCapacity(Iterables.size(addrStrings)); + for (String addrString : addrStrings) { + HostAndPort hostAndPort = parseString(addrString, defaultPort); + hostsAndPorts.add(hostAndPort); + } + return hostsAndPorts; + } + + /** + * Gets a hostname or an IP address and returns an InetAddress. + *

+ * This method can block as there is no API for + * asynchronous DNS resolution in the JDK. + * @param host the hostname to resolve + * @return an InetAddress for the given hostname, + * or {@code null} if the address couldn't be resolved + */ + public static InetAddress getInetAddress(final String host) { + InetAddress[] addrs = getAllInetAddresses(host); + if (addrs != null && addrs.length > 0) { + return addrs[0]; + } + return null; + } + + /** + * Gets a hostname or an IP address and returns an array of InetAddresses. + *

+ * This method can block for a long time if DNS resolution + * is slow. + * @param host the hostname to resolve + * @return an array of InetAddresses for the given hostname, + * or {@code null} if the address couldn't be resolved + */ + public static InetAddress[] getAllInetAddresses(final String host) { + // The 'slow DNS resolution' warning threshold is set to be the same as + // in HostPort::ResolveAddresses() from src/kudu/util/net/net_util.cc. + final long kWarningThresholdNs = 200000000; // 200 ms + + // Once a DNS name is resolved into IP addresses, DNS caching layers of + // a contemporary OS makes follow-up resolutions faster. However, when + // investigating latencies of relatively fast RPC calls, make it possible + // to see in debug logs the exact timing of DNS resolutions that took + // over one millisecond. + final long kDebugThresholdNs = 1000000; // 1 ms + + final long start = System.nanoTime(); + try { + InetAddress[] ipAddrs = InetAddress.getAllByName(host); + final long elapsedNs = System.nanoTime() - start; + + if (elapsedNs > kDebugThresholdNs && LOG.isDebugEnabled()) { + LOG.debug("Resolved '{}' into {} in {}ns", host, ipAddrs, elapsedNs); + } else if (elapsedNs > kWarningThresholdNs) { + LOG.warn("Slow DNS lookup! Resolved '{}' into {} in {}ns", host, ipAddrs, elapsedNs); + } + return ipAddrs; + } catch (UnknownHostException e) { + LOG.error("Failed resolving '{}' into IP addresses in {}ns", host, System.nanoTime() - start); + return null; + } + } + + /** + * Given an InetAddress, checks to see if the address is a local address, by + * comparing the address with all the interfaces on the node. + * @param addr address to check if it is local node's address + * @return true if the address corresponds to the local node + */ + public static boolean isLocalAddress(InetAddress addr) { + // Check if the address is any local or loopback. + boolean local = addr.isAnyLocalAddress() || addr.isLoopbackAddress(); + + // Check if the address is defined on any interface. + if (!local) { + try { + local = NetworkInterface.getByInetAddress(addr) != null; + } catch (SocketException e) { + // Pass. + } + } + return local; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/Pair.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/Pair.java new file mode 100644 index 0000000000..5079c953a0 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/Pair.java @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import com.google.common.base.Objects; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class Pair { + private final A first; + private final B second; + + public Pair(A first, B second) { + this.first = first; + this.second = second; + } + + public A getFirst() { + return first; + } + + public B getSecond() { + return second; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof Pair)) { + return false; + } + + Pair pair = (Pair) o; + + if (first != null ? !first.equals(pair.first) : pair.first != null) { + return false; + } + if (second != null ? !second.equals(pair.second) : pair.second != null) { + return false; + } + + return true; + } + + @Override + public int hashCode() { + return Objects.hashCode(first, second); + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/SchemaGenerator.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/SchemaGenerator.java new file mode 100644 index 0000000000..4b4b1c79c4 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/SchemaGenerator.java @@ -0,0 +1,469 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import static org.apache.kudu.util.DataGenerator.randomBinary; +import static org.apache.kudu.util.DataGenerator.randomDate; +import static org.apache.kudu.util.DataGenerator.randomDecimal; +import static org.apache.kudu.util.DataGenerator.randomString; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnSchema.ColumnSchemaBuilder; +import org.apache.kudu.ColumnSchema.CompressionAlgorithm; +import org.apache.kudu.ColumnSchema.Encoding; +import org.apache.kudu.ColumnTypeAttributes; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.CreateTableOptions; +import org.apache.kudu.client.PartialRow; + +/** + * A utility class to generate random schemas and schema components. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class SchemaGenerator { + + // TODO(ghenke): Make string and binary length configurable. + private static final int DEFAULT_BINARY_LENGTH = 128; + private static final int MIN_HASH_BUCKETS = 2; + + private final Random random; + private final int columnCount; + private final int keyColumnCount; + private final List types; + private final List keyTypes; + private final List encodings; + private final List compressions; + private final List blockSizes; + private final Float defaultRate; + private final int minPrecision; + private final int maxPrecision; + + private SchemaGenerator(final Random random, + final int columnCount, + final int keyColumnCount, + final List types, + final List keyTypes, + final List encodings, + final List compressions, + final List blockSizes, + final Float defaultRate, + final int minPrecision, + final int maxPrecision) { + this.random = random; + this.columnCount = columnCount; + this.keyColumnCount = keyColumnCount; + this.types = types; + this.keyTypes = keyTypes; + this.encodings = encodings; + this.compressions = compressions; + this.blockSizes = blockSizes; + this.defaultRate = defaultRate; + this.minPrecision = minPrecision; + this.maxPrecision = maxPrecision; + } + + /** + * Generates a random Schema. + * @return a random Schema. + */ + public Schema randomSchema() { + List columns = new ArrayList<>(); + for (int i = 0; i < columnCount; i++) { + boolean key = i < keyColumnCount; + Type colType = randomType(key); + String colName = colType.getName() + i; + ColumnSchema column = randomColumnSchema(colName, colType, key); + columns.add(column); + } + return new Schema(columns); + } + + /** + * Generates a random ColumnSchema. + * @return a random ColumnSchema. + */ + public ColumnSchema randomColumnSchema(String name, Type type, boolean key) { + final ColumnSchemaBuilder builder = new ColumnSchemaBuilder(name, type) + .key(key) + // TODO(ghenke): Make nullable columns configurable. + .nullable(random.nextBoolean() && !key) + .compressionAlgorithm(randomCompression()) + .desiredBlockSize(randomBlockSize()) + .encoding(randomEncoding(type)) + .comment("A " + type.getName() + " column for " + name); + + ColumnTypeAttributes typeAttributes = null; + if (type == Type.DECIMAL) { + int precision = random.nextInt((maxPrecision - minPrecision) + 1) + minPrecision; + // TODO(ghenke): Make scale configurable. + int scale = random.nextInt(precision); + typeAttributes = DecimalUtil.typeAttributes(precision, scale); + builder.typeAttributes(typeAttributes); + } + + if (type == Type.VARCHAR) { + int length = random.nextInt( + (CharUtil.MAX_VARCHAR_LENGTH - CharUtil.MIN_VARCHAR_LENGTH) + 1) + + CharUtil.MIN_VARCHAR_LENGTH; + typeAttributes = CharUtil.typeAttributes(length); + builder.typeAttributes(typeAttributes); + } + + // Sometimes set a column default value. + if (random.nextFloat() <= defaultRate) { + switch (type) { + case BOOL: + builder.defaultValue(random.nextBoolean()); + break; + case INT8: + builder.defaultValue((byte)random.nextInt()); + break; + case INT16: + builder.defaultValue((short)random.nextInt()); + break; + case INT32: + builder.defaultValue(random.nextInt()); + break; + case DATE: + builder.defaultValue(randomDate(random)); + break; + case INT64: + case UNIXTIME_MICROS: + builder.defaultValue(random.nextLong()); + break; + case FLOAT: + builder.defaultValue(random.nextFloat()); + break; + case DOUBLE: + builder.defaultValue(random.nextDouble()); + break; + case DECIMAL: + builder.defaultValue(randomDecimal(typeAttributes, random)); + break; + case VARCHAR: + builder.defaultValue(randomString(Math.min(DEFAULT_BINARY_LENGTH, + typeAttributes.getLength()), + random)); + break; + case STRING: + builder.defaultValue(randomString(DEFAULT_BINARY_LENGTH, random)); + break; + case BINARY: + builder.defaultValue(randomBinary(DEFAULT_BINARY_LENGTH, random)); + break; + default: + throw new UnsupportedOperationException("Unsupported type " + type); + } + } + return builder.build(); + } + + public int randomBlockSize() { + return blockSizes.get(random.nextInt(blockSizes.size())); + } + + public CompressionAlgorithm randomCompression() { + return compressions.get(random.nextInt(compressions.size())); + } + + public Type randomType(boolean key) { + if (key) { + return keyTypes.get(random.nextInt(keyTypes.size())); + } else { + return types.get(random.nextInt(types.size())); + } + } + + public Encoding randomEncoding(Type type) { + final List validEncodings = new ArrayList<>(encodings); + // Remove the unsupported encodings for the type. + switch (type) { + case INT8: + case INT16: + case INT32: + case INT64: + case DATE: + case UNIXTIME_MICROS: + validEncodings.retainAll(Arrays.asList( + Encoding.AUTO_ENCODING, + Encoding.PLAIN_ENCODING, + Encoding.BIT_SHUFFLE, + Encoding.RLE)); + break; + case FLOAT: + case DOUBLE: + case DECIMAL: + validEncodings.retainAll(Arrays.asList( + Encoding.AUTO_ENCODING, + Encoding.PLAIN_ENCODING, + Encoding.BIT_SHUFFLE)); + break; + case VARCHAR: + case STRING: + case BINARY: + validEncodings.retainAll(Arrays.asList( + Encoding.AUTO_ENCODING, + Encoding.PLAIN_ENCODING, + Encoding.PREFIX_ENCODING, + Encoding.DICT_ENCODING)); + break; + case BOOL: + validEncodings.retainAll(Arrays.asList( + Encoding.AUTO_ENCODING, + Encoding.PLAIN_ENCODING, + Encoding.RLE)); + break; + default: throw new IllegalArgumentException("Unsupported type " + type); + } + + if (validEncodings.isEmpty()) { + throw new IllegalArgumentException("There are no valid encodings for type " + type); + } + + return validEncodings.get(random.nextInt(validEncodings.size())); + } + + public CreateTableOptions randomCreateTableOptions(Schema schema) { + CreateTableOptions options = new CreateTableOptions(); + final List keyColumns = schema.getPrimaryKeyColumns(); + + // Add hash partitioning (Max out at 3 levels to avoid being excessive). + int hashPartitionLevels = random.nextInt(Math.min(keyColumns.size(), 2)) + 1; + for (int i = 0; i < hashPartitionLevels; i++) { + final ColumnSchema hashColumn = keyColumns.get(i); + // TODO(ghenke): Make buckets configurable. + final int hashBuckets = random.nextInt(2) + MIN_HASH_BUCKETS; + final int hashSeed = random.nextInt(); + options.addHashPartitions(Arrays.asList(hashColumn.getName()), hashBuckets, hashSeed); + } + + boolean hasRangePartition = random.nextBoolean(); + ColumnSchema int64Key = null; + for (ColumnSchema col : keyColumns) { + if (col.getType() == Type.INT64) { + int64Key = col; + break; + } + } + // TODO(ghenke): Configurable range partition rate and more supported types. + if (hasRangePartition && int64Key != null) { + options.setRangePartitionColumns(Arrays.asList(int64Key.getName())); + int splits = random.nextInt(8); // TODO(ghenke): Configurable splits. + List used = new ArrayList<>(); + int i = 0; + while (i < splits) { + PartialRow split = schema.newPartialRow(); + long value = random.nextLong(); + if (!used.contains(value)) { + used.add(value); + split.addLong(int64Key.getName(), random.nextLong()); + i++; + } + } + } + + options.setOwner("random-" + random.nextInt(1000000)); + options.setComment("Table schema generated by SchemaGenerator"); + return options; + } + + /** + * A builder to configure and construct a SchemaGeneratorBuilder instance. + */ + public static class SchemaGeneratorBuilder { + + private Random random = new Random(System.currentTimeMillis()); + + private int columnCount = 10; + private int keyColumnCount = 1; + private List types = Arrays.asList(Type.values()); + private List encodings = new ArrayList<>(); + private List compressions = new ArrayList<>(); + // Default, min, middle, max. + private List blockSizes = Arrays.asList(0, 4096, 524288, 1048576); + private float defaultRate = 0.25f; + private int minPrecision = DecimalUtil.MIN_DECIMAL_PRECISION; + private int maxPrecision = DecimalUtil.MAX_DECIMAL_PRECISION; + + public SchemaGeneratorBuilder() { + // Add all encoding options and remove any invalid ones. + encodings.addAll(Arrays.asList(Encoding.values())); + encodings.remove(Encoding.UNKNOWN); + // Add all compression options and remove any invalid ones. + compressions.addAll(Arrays.asList(CompressionAlgorithm.values())); + compressions.remove(CompressionAlgorithm.UNKNOWN); + } + + /** + * Define a custom Random instance to use for any random generation. + * @return this instance + */ + public SchemaGeneratorBuilder random(Random random) { + this.random = random; + return this; + } + + /** + * Define the column count of a random schema. + * @return this instance + */ + public SchemaGeneratorBuilder columnCount(int columnCount) { + Preconditions.checkArgument(columnCount > 0, + "columnCount must be greater than 0"); + this.columnCount = columnCount; + return this; + } + + /** + * Define the key column count of a random schema. + * @return this instance + */ + public SchemaGeneratorBuilder keyColumnCount(int keyColumnCount) { + Preconditions.checkArgument(columnCount > 0, + "keyColumnCount must be greater than 0"); + this.keyColumnCount = keyColumnCount; + return this; + } + + /** + * Define the types that can be used when randomly generating a column schema. + * @return this instance + */ + public SchemaGeneratorBuilder types(Type... types) { + this.types = Arrays.asList(types); + return this; + } + + /** + * Define the types that can *not* be used when randomly generating a column schema. + * @return this instance + */ + public SchemaGeneratorBuilder excludeTypes(Type... types) { + List includedTypes = new ArrayList<>(); + // Add all possible types. + includedTypes.addAll(Arrays.asList(Type.values())); + // Remove the excluded types. + for (Type type : types) { + includedTypes.remove(type); + } + this.types = includedTypes; + return this; + } + + /** + * Define the encoding options that can be used when randomly generating + * a column schema. + * @return this instance + */ + public SchemaGeneratorBuilder encodings(Encoding... encodings) { + this.encodings = Arrays.asList(encodings); + return this; + } + + /** + * Define the compression options that can be used when randomly generating + * a column schema. + * @return this instance + */ + public SchemaGeneratorBuilder compressions(CompressionAlgorithm... compressions) { + this.compressions = Arrays.asList(compressions); + return this; + } + + /** + * Define the rate at which default values should be used when randomly generating + * a column schema. + * @return this instance + */ + public SchemaGeneratorBuilder defaultRate(float defaultRate) { + Preconditions.checkArgument(defaultRate >= 0f && defaultRate <= 1f, + "defaultRate must be between 0 and 1"); + this.defaultRate = defaultRate; + return this; + } + + /** + * Define the precision value to use when when randomly generating + * a column schema with a Decimal type. + * @return this instance + */ + public SchemaGeneratorBuilder precision(int precision) { + return precisionRange(precision, precision); + } + + /** + * Define the range of precision values to use when when randomly generating + * a column schema with a Decimal type. + * @return this instance + */ + public SchemaGeneratorBuilder precisionRange(int minPrecision, int maxPrecision) { + Preconditions.checkArgument(minPrecision >= DecimalUtil.MIN_DECIMAL_PRECISION, + "minPrecision must be greater than or equal to " + + DecimalUtil.MIN_DECIMAL_PRECISION); + Preconditions.checkArgument(maxPrecision <= DecimalUtil.MAX_DECIMAL_PRECISION, + "maxPrecision must be less than or equal to " + + DecimalUtil.MAX_DECIMAL_PRECISION); + Preconditions.checkArgument(minPrecision <= maxPrecision, + "minPrecision must be less than or equal to " + maxPrecision); + this.minPrecision = minPrecision; + this.maxPrecision = maxPrecision; + return this; + } + + public SchemaGenerator build() { + Preconditions.checkArgument(keyColumnCount <= columnCount, + "keyColumnCount must be less than or equal to the columnCount"); + + // Filter the types that are compatible for key columns. + List keyTypes = new ArrayList<>(types); + keyTypes.removeAll(Arrays.asList(Type.BOOL, Type.FLOAT, Type.DOUBLE)); + Preconditions.checkArgument(!keyTypes.isEmpty(), + "At least one type must be supported for key columns"); + + return new SchemaGenerator( + random, + columnCount, + keyColumnCount, + types, + keyTypes, + encodings, + compressions, + blockSizes, + defaultRate, + minPrecision, + maxPrecision + ); + } + } + + +} + + + diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/SecurityUtil.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/SecurityUtil.java new file mode 100644 index 0000000000..11e2cfcf8a --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/SecurityUtil.java @@ -0,0 +1,242 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import java.security.MessageDigest; +import java.security.cert.Certificate; +import java.security.cert.X509Certificate; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import javax.annotation.Nullable; +import javax.security.auth.Subject; +import javax.security.auth.kerberos.KerberosPrincipal; +import javax.security.auth.kerberos.KerberosTicket; +import javax.security.auth.login.AppConfigurationEntry; +import javax.security.auth.login.Configuration; +import javax.security.auth.login.LoginContext; +import javax.security.auth.login.LoginException; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@InterfaceAudience.Private +public abstract class SecurityUtil { + private static final Logger LOG = LoggerFactory.getLogger(SecurityUtil.class); + public static final String KUDU_TICKETCACHE_PROPERTY = "kudu.krb5ccname"; + + /** + * Map from the names of digest algorithms used in X509 certificates to + * the appropriate MessageDigest implementation to use for channel-bindings. + */ + private static final ImmutableMap CERT_DIGEST_TO_MESSAGE_DIGEST = + ImmutableMap.builder() + // RFC 5929: if the certificate's signatureAlgorithm uses a single hash + // function, and that hash function is either MD5 [RFC1321] or SHA-1 + // [RFC3174], then use SHA-256 [FIPS-180-3]; + .put("MD5", "SHA-256") + .put("SHA1", "SHA-256") + // For other algorithms, use the provided hash function. + .put("SHA224", "SHA-224") + .put("SHA256", "SHA-256") + .put("SHA384", "SHA-384") + .put("SHA512", "SHA-512") + // The above list is exhaustive as of JDK8's implementation of + // SignatureAndHashAlgorithm. + .build(); + + /** + * If we have Kerberos credentials that are within this specified window + * of expiration, then refresh them. + */ + private static final long REFRESH_BEFORE_EXPIRATION_SECS = 10; + + /** + * Return the Subject associated with the current thread's AccessController, + * if that subject has Kerberos credentials. If there is no such subject, or + * the subject has no Kerberos credentials, a new subject is logged in from + * the currently configured TicketCache. + */ + @Nullable + public static Subject getSubjectFromTicketCacheOrNull() { + // If there isn't any current subject with krb5 principals, try to login + // using the ticket cache. + Configuration conf = new Configuration() { + @Override + public AppConfigurationEntry[] getAppConfigurationEntry(String name) { + Map options = new HashMap<>(); + + // TODO: should we offer some kind of "renewal thread" or + // "reacquire from keytab thread" like Hadoop does? + options.put("useTicketCache", "true"); + options.put("doNotPrompt", "true"); + options.put("refreshKrb5Config", "true"); + + // Allow configuring debug by a system property. + options.put("debug", Boolean.toString(Boolean.getBoolean("kudu.jaas.debug"))); + + // Look for the ticket cache specified in one of the following ways: + // 1) in a Kudu-specific system property (this is convenient for testing) + // 2) in the KRB5CCNAME environment variable + // 3) the Java default (by not setting any value) + String ticketCache = System.getProperty(KUDU_TICKETCACHE_PROPERTY, + System.getenv("KRB5CCNAME")); + if (ticketCache != null) { + LOG.debug("Using ticketCache: {}", ticketCache); + options.put("ticketCache", ticketCache); + } + options.put("renewTGT", "true"); + + return new AppConfigurationEntry[] { new AppConfigurationEntry( + "com.sun.security.auth.module.Krb5LoginModule", + AppConfigurationEntry.LoginModuleControlFlag.REQUIRED, options) }; + } + }; + try { + LoginContext loginContext = new LoginContext("kudu", new Subject(), null, conf); + loginContext.login(); + Subject subject = loginContext.getSubject(); + LOG.debug("Logged in as subject: {}", Joiner.on(",").join(subject.getPrincipals())); + return subject; + } catch (LoginException e) { + LOG.debug("Could not login via JAAS. Using no credentials: " + e.getMessage(), + LOG.isTraceEnabled() ? e : null); + return null; + } + } + + /** + * Compute the "tls-server-endpoint" channel binding data for the given X509 + * certificate. The algorithm is specified in RFC 5929. + * + * @return the expected channel bindings for the certificate + * @throws RuntimeException if the certificate is not an X509 cert, or if + * it uses a signature type for which we cannot compute channel bindings + */ + public static byte[] getEndpointChannelBindings(Certificate cert) { + Preconditions.checkArgument(cert instanceof X509Certificate, + "can only handle X509 certs"); + X509Certificate x509 = (X509Certificate)cert; + String sigAlg = x509.getSigAlgName(); + String sigAlgUpper = sigAlg.toUpperCase(Locale.ENGLISH); + + // The signature algorithm name is a string like 'SHA256withRSA'. + // There's no API available to actually find just the digest algorithm, + // so we resort to some hackery. + String[] components = sigAlgUpper.split("WITH", 2); + String digestAlg = CERT_DIGEST_TO_MESSAGE_DIGEST.get(components[0]); + if (digestAlg == null) { + // RFC 5929: if the certificate's signatureAlgorithm uses no hash functions or + // uses multiple hash functions, then this channel binding type's channel + // bindings are undefined at this time (updates to is channel binding type may + // occur to address this issue if it ever arises). + throw new RuntimeException("cert uses unknown signature algorithm: " + sigAlg); + } + try { + return MessageDigest.getInstance(digestAlg).digest(cert.getEncoded()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * @return true if 'subject' contains a Kerberos TGT that is about to expire, or + * if it contains no TGT at all. + */ + public static boolean needsRefresh(Subject subject) { + long deadline = System.currentTimeMillis() + REFRESH_BEFORE_EXPIRATION_SECS * 1000; + return tgtExpiresBefore(subject, deadline); + } + + /** + * @return true if 'subject' contains a Kerberos TGT that is expired, or if it contains + * no TGT at all. + */ + public static boolean isTgtExpired(Subject subject) { + return tgtExpiresBefore(subject, System.currentTimeMillis()); + } + + private static boolean tgtExpiresBefore(Subject subject, long deadlineMillis) { + KerberosTicket tgt = findTgt(subject); + if (tgt != null) { + return tgt.getEndTime().getTime() < deadlineMillis; + } + // We didn't find any TGT. This likely means that it expired and got + // removed during a connection attempt. So, we need to get a new one. + return true; + } + + private static KerberosTicket findTgt(Subject subject) { + Set tickets = subject.getPrivateCredentials(KerberosTicket.class); + // tickets is a Collections.synchronizedSet() wrapper, so we need to synchronize + // on it to iterate it. + synchronized (tickets) { + for (KerberosTicket ticket : tickets) { + if (SecurityUtil.isTGSPrincipal(ticket.getServer())) { + return ticket; + } + } + } + return null; + } + + /** + * @return true if 'principal' matches the expected pattern for a TGT + */ + private static boolean isTGSPrincipal(KerberosPrincipal principal) { + // When a principal foo@BAR authenticates to realm BAR, it will get a service + // ticket with the service principal 'krbtgt/BAR@BAR'. Note that this is the + // case even when the credentials will be used to authenticate to a remote + // realm using cross-realm trust. + // + // For example, if the user alice@AD.CORP is connecting to a Kudu service + // kudu/host@CLUSTER.LOCAL, the ticket cache will contain the following + // tickets: + // + // krbtgt/AD.CORP@AD.CORP + // krbtgt/CLUSTER.LOCAL@AD.CORP (cross-realm trust ticket) + // kudu/host@CLUSTER.LOCAL (service in remote realm) + // + // Here we are simply trying to identify the first of those tickets. + return principal != null && principal.getName().equals( + "krbtgt/" + principal.getRealm() + "@" + principal.getRealm()); + } + + /** + * @return the KerberosPrincipal object associated with the given Subject. + * If there is no Principal, returns null. If there is more than one principal + * (not expected), logs a warning and also returns null. + */ + public static KerberosPrincipal getKerberosPrincipalOrNull(Subject newSubject) { + Set principals = newSubject.getPrincipals(KerberosPrincipal.class); + if (principals.size() > 1) { + LOG.warn("JAAS Subject unexpectedly includes more than one principal: {}", + Joiner.on(", ").join(principals)); + return null; + } else if (principals.isEmpty()) { + return null; + } + + return principals.iterator().next(); + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/Slice.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/Slice.java new file mode 100644 index 0000000000..60a3f38e5f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/Slice.java @@ -0,0 +1,659 @@ +/* + * Copyright 2009 Red Hat, Inc. + * + * Red Hat licenses this file to you under the Apache License, version 2.0 + * (the "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * Copyright 2011 Dain Sundstrom + * Copyright 2011 FuseSource Corp. http://fusesource.com + */ + +package org.apache.kudu.util; + +import static java.nio.ByteOrder.LITTLE_ENDIAN; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.channels.ClosedChannelException; +import java.nio.channels.FileChannel; +import java.nio.channels.GatheringByteChannel; +import java.nio.channels.ScatteringByteChannel; +import java.nio.charset.Charset; +import java.util.Arrays; + +import com.google.common.base.Preconditions; +import com.google.common.primitives.Ints; +import com.google.common.primitives.Longs; +import com.google.common.primitives.Shorts; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Little Endian slice of a byte array. + * + * The slice holds on to a reference of the underlying byte array meaning it + * cannot be garbage collected until the Slice itself can be garbage collected. + */ +@InterfaceAudience.Private +public final class Slice implements Comparable { + private final byte[] data; + private final int offset; + private final int length; + + private int hash; + + public Slice(int length) { + data = new byte[length]; + this.offset = 0; + this.length = length; + } + + public Slice(byte[] data) { + Preconditions.checkNotNull(data, "array is null"); + this.data = data; + this.offset = 0; + this.length = data.length; + } + + public Slice(byte[] data, int offset, int length) { + Preconditions.checkNotNull(data, "array is null"); + this.data = data; + this.offset = offset; + this.length = length; + } + + /** + * Length of this slice. + */ + public int length() { + return length; + } + + /** + * Gets the array underlying this slice. + */ + public byte[] getRawArray() { + return data; + } + + /** + * Gets the offset of this slice in the underlying array. + */ + public int getRawOffset() { + return offset; + } + + /** + * Gets a byte at the specified absolute {@code index} in this buffer. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * {@code index + 1} is greater than {@code this.capacity} + */ + public byte getByte(int index) { + Preconditions.checkPositionIndexes(index, index + 1, this.length); + index += offset; + return data[index]; + } + + /** + * Gets an unsigned byte at the specified absolute {@code index} in this + * buffer. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * {@code index + 1} is greater than {@code this.capacity} + */ + public short getUnsignedByte(int index) { + return (short) (getByte(index) & 0xFF); + } + + /** + * Gets a 16-bit short integer at the specified absolute {@code index} in + * this slice. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * {@code index + 2} is greater than {@code this.capacity} + */ + public short getShort(int index) { + Preconditions.checkPositionIndexes(index, index + Shorts.BYTES, this.length); + index += offset; + return (short) ((data[index] & 0xFF) | (data[index + 1] << 8)); + } + + /** + * Gets a 32-bit integer at the specified absolute {@code index} in + * this buffer. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * {@code index + 4} is greater than {@code this.capacity} + */ + public int getInt(int index) { + Preconditions.checkPositionIndexes(index, index + Ints.BYTES, this.length); + index += offset; + return (data[index] & 0xff) | + (data[index + 1] & 0xff) << 8 | + (data[index + 2] & 0xff) << 16 | + (data[index + 3] & 0xff) << 24; + } + + /** + * Gets a 64-bit long integer at the specified absolute {@code index} in + * this buffer. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * {@code index + 8} is greater than {@code this.capacity} + */ + public long getLong(int index) { + Preconditions.checkPositionIndexes(index, index + Longs.BYTES, this.length); + index += offset; + return ((long) data[index] & 0xff) | + ((long) data[index + 1] & 0xff) << 8 | + ((long) data[index + 2] & 0xff) << 16 | + ((long) data[index + 3] & 0xff) << 24 | + ((long) data[index + 4] & 0xff) << 32 | + ((long) data[index + 5] & 0xff) << 40 | + ((long) data[index + 6] & 0xff) << 48 | + ((long) data[index + 7] & 0xff) << 56; + } + + /** + * Transfers this buffer's data to the specified destination starting at + * the specified absolute {@code index}. + * + * @param dstIndex the first index of the destination + * @param length the number of bytes to transfer + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0}, + * if the specified {@code dstIndex} is less than {@code 0}, + * if {@code index + length} is greater than + * {@code this.capacity}, or + * if {@code dstIndex + length} is greater than + * {@code dst.capacity} + */ + public void getBytes(int index, Slice dst, int dstIndex, int length) { + getBytes(index, dst.data, dstIndex, length); + } + + /** + * Transfers this buffer's data to the specified destination starting at + * the specified absolute {@code index}. + * + * @param destinationIndex the first index of the destination + * @param length the number of bytes to transfer + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0}, + * if the specified {@code dstIndex} is less than {@code 0}, + * if {@code index + length} is greater than + * {@code this.capacity}, or + * if {@code dstIndex + length} is greater than + * {@code dst.length} + */ + public void getBytes(int index, byte[] destination, int destinationIndex, int length) { + Preconditions.checkPositionIndexes(index, index + length, this.length); + Preconditions.checkPositionIndexes(destinationIndex, + destinationIndex + length, destination.length); + index += offset; + System.arraycopy(data, index, destination, destinationIndex, length); + } + + public byte[] getBytes() { + return getBytes(0, length); + } + + public byte[] getBytes(int index, int length) { + index += offset; + if (index == 0) { + return Arrays.copyOf(data, length); + } else { + byte[] value = new byte[length]; + System.arraycopy(data, index, value, 0, length); + return value; + } + } + + /** + * Transfers this buffer's data to the specified destination starting at + * the specified absolute {@code index} until the destination's position + * reaches its limit. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * if {@code index + dst.remaining()} is greater than + * {@code this.capacity} + */ + public void getBytes(int index, ByteBuffer destination) { + Preconditions.checkPositionIndex(index, this.length); + index += offset; + destination.put(data, index, Math.min(length, destination.remaining())); + } + + /** + * Transfers this buffer's data to the specified stream starting at the + * specified absolute {@code index}. + * + * @param length the number of bytes to transfer + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * if {@code index + length} is greater than + * {@code this.capacity} + * @throws java.io.IOException if the specified stream threw an exception during I/O + */ + public void getBytes(int index, OutputStream out, int length) + throws IOException { + Preconditions.checkPositionIndexes(index, index + length, this.length); + index += offset; + out.write(data, index, length); + } + + /** + * Transfers this buffer's data to the specified channel starting at the + * specified absolute {@code index}. + * + * @param length the maximum number of bytes to transfer + * @return the actual number of bytes written out to the specified channel + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * if {@code index + length} is greater than + * {@code this.capacity} + * @throws java.io.IOException if the specified channel threw an exception during I/O + */ + public int getBytes(int index, GatheringByteChannel out, int length) + throws IOException { + Preconditions.checkPositionIndexes(index, index + length, this.length); + index += offset; + return out.write(ByteBuffer.wrap(data, index, length)); + } + + /** + * Sets the specified 16-bit short integer at the specified absolute + * {@code index} in this buffer. The 16 high-order bits of the specified + * value are ignored. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * {@code index + 2} is greater than {@code this.capacity} + */ + public void setShort(int index, int value) { + Preconditions.checkPositionIndexes(index, index + Shorts.BYTES, this.length); + index += offset; + data[index] = (byte) value; + data[index + 1] = (byte) (value >>> 8); + } + + /** + * Sets the specified 32-bit integer at the specified absolute + * {@code index} in this buffer. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * {@code index + 4} is greater than {@code this.capacity} + */ + public void setInt(int index, int value) { + Preconditions.checkPositionIndexes(index, index + Ints.BYTES, this.length); + index += offset; + data[index] = (byte) value; + data[index + 1] = (byte) (value >>> 8); + data[index + 2] = (byte) (value >>> 16); + data[index + 3] = (byte) (value >>> 24); + } + + /** + * Sets the specified 64-bit long integer at the specified absolute + * {@code index} in this buffer. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * {@code index + 8} is greater than {@code this.capacity} + */ + public void setLong(int index, long value) { + Preconditions.checkPositionIndexes(index, index + Longs.BYTES, this.length); + index += offset; + data[index] = (byte) value; + data[index + 1] = (byte) (value >>> 8); + data[index + 2] = (byte) (value >>> 16); + data[index + 3] = (byte) (value >>> 24); + data[index + 4] = (byte) (value >>> 32); + data[index + 5] = (byte) (value >>> 40); + data[index + 6] = (byte) (value >>> 48); + data[index + 7] = (byte) (value >>> 56); + } + + /** + * Sets the specified byte at the specified absolute {@code index} in this + * buffer. The 24 high-order bits of the specified value are ignored. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * {@code index + 1} is greater than {@code this.capacity} + */ + public void setByte(int index, int value) { + Preconditions.checkPositionIndexes(index, index + 1, this.length); + index += offset; + data[index] = (byte) value; + } + + /** + * Transfers the specified source buffer's data to this buffer starting at + * the specified absolute {@code index}. + * + * @param srcIndex the first index of the source + * @param length the number of bytes to transfer + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0}, + * if the specified {@code srcIndex} is less than {@code 0}, + * if {@code index + length} is greater than + * {@code this.capacity}, or + * if {@code srcIndex + length} is greater than + * {@code src.capacity} + */ + public void setBytes(int index, Slice src, int srcIndex, int length) { + setBytes(index, src.data, src.offset + srcIndex, length); + } + + /** + * Transfers the specified source array's data to this buffer starting at + * the specified absolute {@code index}. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0}, + * if the specified {@code srcIndex} is less than {@code 0}, + * if {@code index + length} is greater than + * {@code this.capacity}, or + * if {@code srcIndex + length} is greater than {@code src.length} + */ + public void setBytes(int index, byte[] source, int sourceIndex, int length) { + Preconditions.checkPositionIndexes(index, index + length, this.length); + Preconditions.checkPositionIndexes(sourceIndex, sourceIndex + length, source.length); + index += offset; + System.arraycopy(source, sourceIndex, data, index, length); + } + + /** + * Transfers the specified source buffer's data to this buffer starting at + * the specified absolute {@code index} until the source buffer's position + * reaches its limit. + * + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * if {@code index + src.remaining()} is greater than + * {@code this.capacity} + */ + public void setBytes(int index, ByteBuffer source) { + Preconditions.checkPositionIndexes(index, index + source.remaining(), this.length); + index += offset; + source.get(data, index, source.remaining()); + } + + /** + * Transfers the content of the specified source stream to this buffer + * starting at the specified absolute {@code index}. + * + * @param length the number of bytes to transfer + * @return the actual number of bytes read in from the specified channel. + * {@code -1} if the specified channel is closed. + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * if {@code index + length} is greater than {@code this.capacity} + * @throws java.io.IOException if the specified stream threw an exception during I/O + */ + public int setBytes(int index, InputStream in, int length) + throws IOException { + Preconditions.checkPositionIndexes(index, index + length, this.length); + index += offset; + int readBytes = 0; + do { + int localReadBytes = in.read(data, index, length); + if (localReadBytes < 0) { + if (readBytes == 0) { + return -1; + } else { + break; + } + } + readBytes += localReadBytes; + index += localReadBytes; + length -= localReadBytes; + } while (length > 0); + + return readBytes; + } + + /** + * Transfers the content of the specified source channel to this buffer + * starting at the specified absolute {@code index}. + * + * @param length the maximum number of bytes to transfer + * @return the actual number of bytes read in from the specified channel. + * {@code -1} if the specified channel is closed. + * @throws IndexOutOfBoundsException if the specified {@code index} is less than {@code 0} or + * if {@code index + length} is greater than {@code this.capacity} + * @throws java.io.IOException if the specified channel threw an exception during I/O + */ + public int setBytes(int index, ScatteringByteChannel in, int length) + throws IOException { + Preconditions.checkPositionIndexes(index, index + length, this.length); + index += offset; + ByteBuffer buf = ByteBuffer.wrap(data, index, length); + int readBytes = 0; + + do { + int localReadBytes; + try { + localReadBytes = in.read(buf); + } catch (ClosedChannelException e) { + localReadBytes = -1; + } + if (localReadBytes < 0) { + if (readBytes == 0) { + return -1; + } else { + break; + } + } else if (localReadBytes == 0) { + break; + } + readBytes += localReadBytes; + } while (readBytes < length); + + return readBytes; + } + + public int setBytes(int index, FileChannel in, int position, int length) + throws IOException { + Preconditions.checkPositionIndexes(index, index + length, this.length); + index += offset; + ByteBuffer buf = ByteBuffer.wrap(data, index, length); + int readBytes = 0; + + do { + int localReadBytes; + try { + localReadBytes = in.read(buf, position + readBytes); + } catch (ClosedChannelException e) { + localReadBytes = -1; + } + if (localReadBytes < 0) { + if (readBytes == 0) { + return -1; + } else { + break; + } + } else if (localReadBytes == 0) { + break; + } + readBytes += localReadBytes; + } while (readBytes < length); + + return readBytes; + } + + public Slice copySlice() { + return copySlice(0, length); + } + + /** + * Returns a copy of this buffer's sub-region. Modifying the content of + * the returned buffer or this buffer does not affect each other at all. + */ + public Slice copySlice(int index, int length) { + Preconditions.checkPositionIndexes(index, index + length, this.length); + + index += offset; + byte[] copiedArray = new byte[length]; + System.arraycopy(data, index, copiedArray, 0, length); + return new Slice(copiedArray); + } + + public byte[] copyBytes() { + return copyBytes(0, length); + } + + public byte[] copyBytes(int index, int length) { + Preconditions.checkPositionIndexes(index, index + length, this.length); + index += offset; + if (index == 0) { + return Arrays.copyOf(data, length); + } else { + byte[] value = new byte[length]; + System.arraycopy(data, index, value, 0, length); + return value; + } + } + + /** + * Returns a slice of this buffer's readable bytes. Modifying the content + * of the returned buffer or this buffer affects each other's content + * while they maintain separate indexes and marks. + */ + public Slice slice() { + return slice(0, length); + } + + /** + * Returns a slice of this buffer's sub-region. Modifying the content of + * the returned buffer or this buffer affects each other's content while + * they maintain separate indexes and marks. + */ + public Slice slice(int index, int length) { + if (index == 0 && length == this.length) { + return this; + } + + Preconditions.checkPositionIndexes(index, index + length, this.length); + if (index >= 0 && length == 0) { + return Slices.EMPTY_SLICE; + } + return new Slice(data, offset + index, length); + } + + /** + * Converts this buffer's readable bytes into a NIO buffer. The returned + * buffer shares the content with this buffer. + */ + public ByteBuffer toByteBuffer() { + return toByteBuffer(0, length); + } + + /** + * Converts this buffer's sub-region into a NIO buffer. The returned + * buffer shares the content with this buffer. + */ + public ByteBuffer toByteBuffer(int index, int length) { + Preconditions.checkPositionIndexes(index, index + length, this.length); + index += offset; + return ByteBuffer.wrap(data, index, length).order(LITTLE_ENDIAN); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + Slice slice = (Slice) o; + + // do lengths match + if (length != slice.length) { + return false; + } + + // if arrays have same base offset, some optimizations can be taken... + if (offset == slice.offset && data == slice.data) { + return true; + } + for (int i = 0; i < length; i++) { + if (data[offset + i] != slice.data[slice.offset + i]) { + return false; + } + } + return true; + } + + @Override + public int hashCode() { + if (hash != 0) { + return hash; + } + + int result = length; + for (int i = offset; i < offset + length; i++) { + result = 31 * result + data[i]; + } + if (result == 0) { + result = 1; + } + hash = result; + return hash; + } + + /** + * Compares the content of the specified buffer to the content of this + * buffer. This comparison is performed byte by byte using an unsigned + * comparison. + */ + @Override + public int compareTo(Slice that) { + if (this == that) { + return 0; + } + if (this.data == that.data && length == that.length && offset == that.offset) { + return 0; + } + + int minLength = Math.min(this.length, that.length); + for (int i = 0; i < minLength; i++) { + int thisByte = 0xFF & this.data[this.offset + i]; + int thatByte = 0xFF & that.data[that.offset + i]; + if (thisByte != thatByte) { + return thisByte - thatByte; + } + } + return this.length - that.length; + } + + /** + * Decodes this buffer's readable bytes into a string with the specified + * character set name. + */ + public String toString(Charset charset) { + return toString(0, length, charset); + } + + /** + * Decodes this buffer's sub-region into a string with the specified + * character set. + */ + public String toString(int index, int length, Charset charset) { + if (length == 0) { + return ""; + } + + return Slices.decodeString(toByteBuffer(index, length), charset); + } + + @Override + public String toString() { + return getClass().getSimpleName() + '(' + + "length=" + length() + + ')'; + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/Slices.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/Slices.java new file mode 100644 index 0000000000..d3d362eeb6 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/Slices.java @@ -0,0 +1,216 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * Copyright 2011 Dain Sundstrom + * Copyright 2011 FuseSource Corp. http://fusesource.com + */ + +package org.apache.kudu.util; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; +import java.util.IdentityHashMap; +import java.util.Map; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public final class Slices { + /** + * A buffer whose capacity is {@code 0}. + */ + public static final Slice EMPTY_SLICE = new Slice(0); + + private Slices() { + } + + public static Slice ensureSize(Slice existingSlice, int minWritableBytes) { + if (existingSlice == null) { + existingSlice = EMPTY_SLICE; + } + + if (minWritableBytes <= existingSlice.length()) { + return existingSlice; + } + + int newCapacity; + if (existingSlice.length() == 0) { + newCapacity = 1; + } else { + newCapacity = existingSlice.length(); + } + int minNewCapacity = existingSlice.length() + minWritableBytes; + while (newCapacity < minNewCapacity) { + newCapacity <<= 1; + } + + Slice newSlice = Slices.allocate(newCapacity); + newSlice.setBytes(0, existingSlice, 0, existingSlice.length()); + return newSlice; + } + + public static Slice allocate(int capacity) { + if (capacity == 0) { + return EMPTY_SLICE; + } + return new Slice(capacity); + } + + public static Slice wrappedBuffer(byte[] array) { + if (array.length == 0) { + return EMPTY_SLICE; + } + return new Slice(array); + } + + public static Slice copiedBuffer(ByteBuffer source, int sourceOffset, int length) { + Preconditions.checkNotNull(source, "source is null"); + int newPosition = source.position() + sourceOffset; + return copiedBuffer((ByteBuffer) source.duplicate().order(ByteOrder.LITTLE_ENDIAN).clear() + .limit(newPosition + length).position(newPosition)); + } + + public static Slice copiedBuffer(ByteBuffer source) { + Preconditions.checkNotNull(source, "source is null"); + Slice copy = allocate(source.limit() - source.position()); + copy.setBytes(0, source.duplicate().order(ByteOrder.LITTLE_ENDIAN)); + return copy; + } + + public static Slice copiedBuffer(String string, Charset charset) { + Preconditions.checkNotNull(string, "string is null"); + Preconditions.checkNotNull(charset, "charset is null"); + + return wrappedBuffer(string.getBytes(charset)); + } + + public static ByteBuffer encodeString(CharBuffer src, Charset charset) { + final CharsetEncoder encoder = getEncoder(charset); + final ByteBuffer dst = ByteBuffer.allocate( + (int) ((double) src.remaining() * encoder.maxBytesPerChar())); + try { + CoderResult cr = encoder.encode(src, dst, true); + if (!cr.isUnderflow()) { + cr.throwException(); + } + cr = encoder.flush(dst); + if (!cr.isUnderflow()) { + cr.throwException(); + } + } catch (CharacterCodingException x) { + throw new IllegalStateException(x); + } + dst.flip(); + return dst; + } + + public static String decodeString(ByteBuffer src, Charset charset) { + final CharsetDecoder decoder = getDecoder(charset); + final CharBuffer dst = CharBuffer.allocate( + (int) ((double) src.remaining() * decoder.maxCharsPerByte())); + try { + CoderResult cr = decoder.decode(src, dst, true); + if (!cr.isUnderflow()) { + cr.throwException(); + } + cr = decoder.flush(dst); + if (!cr.isUnderflow()) { + cr.throwException(); + } + } catch (CharacterCodingException x) { + throw new IllegalStateException(x); + } + return dst.flip().toString(); + } + + private static final ThreadLocal> encoders = + new ThreadLocal>() { + @Override + protected Map initialValue() { + return new IdentityHashMap<>(); + } + }; + + private static final ThreadLocal> decoders = + new ThreadLocal>() { + @Override + protected Map initialValue() { + return new IdentityHashMap<>(); + } + }; + + /** + * Returns a cached thread-local {@link CharsetEncoder} for the specified + * charset. + */ + private static CharsetEncoder getEncoder(Charset charset) { + if (charset == null) { + throw new NullPointerException("charset"); + } + + Map map = encoders.get(); + CharsetEncoder e = map.get(charset); + if (e != null) { + e.reset(); + e.onMalformedInput(CodingErrorAction.REPLACE); + e.onUnmappableCharacter(CodingErrorAction.REPLACE); + return e; + } + + e = charset.newEncoder(); + e.onMalformedInput(CodingErrorAction.REPLACE); + e.onUnmappableCharacter(CodingErrorAction.REPLACE); + map.put(charset, e); + return e; + } + + + /** + * Returns a cached thread-local {@link CharsetDecoder} for the specified + * charset. + */ + private static CharsetDecoder getDecoder(Charset charset) { + if (charset == null) { + throw new NullPointerException("charset"); + } + + Map map = decoders.get(); + CharsetDecoder d = map.get(charset); + if (d != null) { + d.reset(); + d.onMalformedInput(CodingErrorAction.REPLACE); + d.onUnmappableCharacter(CodingErrorAction.REPLACE); + return d; + } + + d = charset.newDecoder(); + d.onMalformedInput(CodingErrorAction.REPLACE); + d.onUnmappableCharacter(CodingErrorAction.REPLACE); + map.put(charset, d); + return d; + } + +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/StringUtil.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/StringUtil.java new file mode 100644 index 0000000000..fe2855dd2c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/StringUtil.java @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class StringUtil { + + /** Non-constructable utility class. */ + private StringUtil() { + } + + /** + * Escapes the provided string and appends it to the string builder. The + * escaping is done according to the Hive/Impala escaping rules. Adapted from + * org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.escapeSQLString, with + * one difference: '%' and '_' are not escaped, since the resulting escaped + * string should not be used for a LIKE statement. + */ + public static void appendEscapedSQLString(String s, StringBuilder sb) { + for (int i = 0; i < s.length(); i++) { + char currentChar = s.charAt(i); + switch (currentChar) { + case '\0': { + sb.append("\\0"); + break; + } + case '\'': { + sb.append("\\'"); + break; + } + case '\"': { + sb.append("\\\""); + break; + } + case '\b': { + sb.append("\\b"); + break; + } + case '\n': { + sb.append("\\n"); + break; + } + case '\r': { + sb.append("\\r"); + break; + } + case '\t': { + sb.append("\\t"); + break; + } + case '\\': { + sb.append("\\\\"); + break; + } + case '\u001A': { + sb.append("\\Z"); + break; + } + default: { + if (currentChar < ' ') { + sb.append("\\u"); + String hex = Integer.toHexString(currentChar); + for (int j = 4; j > hex.length(); --j) { + sb.append('0'); + } + sb.append(hex); + } else { + sb.append(currentChar); + } + } + } + } + } +} diff --git a/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/TimestampUtil.java b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/TimestampUtil.java new file mode 100644 index 0000000000..71d405324f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/main/java/org/apache/kudu/util/TimestampUtil.java @@ -0,0 +1,107 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.util; + +import java.sql.Timestamp; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +public class TimestampUtil { + + /** Non-constructable utility class. */ + private TimestampUtil() { + } + + // Thread local DateFormat since they're not thread-safe. + private static final ThreadLocal DATE_FORMAT = new ThreadLocal() { + @Override + protected DateFormat initialValue() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); + sdf.setTimeZone(TimeZone.getTimeZone("UTC")); + return sdf; + } + }; + + /** + * Converts a {@link Timestamp} to microseconds since the Unix epoch (1970-01-01T00:00:00Z). + * + * Note: Timestamp instances with nanosecond precision are truncated to microseconds. + * + * @param timestamp the timestamp to convert to microseconds + * @return the microseconds since the Unix epoch + */ + public static long timestampToMicros(Timestamp timestamp) { + // Number of whole milliseconds since the Unix epoch, in microseconds. + long millis = timestamp.getTime() * 1000L; + // Sub millisecond time since the Unix epoch, in microseconds. + long micros = (timestamp.getNanos() % 1000000L) / 1000L; + if (micros >= 0) { + return millis + micros; + } else { + return millis + 1000000L + micros; + } + } + + /** + * Converts a microsecond offset from the Unix epoch (1970-01-01T00:00:00Z) + * to a {@link Timestamp}. + * + * @param micros the offset in microseconds since the Unix epoch + * @return the corresponding timestamp + */ + public static Timestamp microsToTimestamp(long micros) { + long millis = micros / 1000L; + long nanos = (micros % 1000000L) * 1000L; + if (nanos < 0) { + millis -= 1L; + nanos += 1000000000L; + } + Timestamp timestamp = new Timestamp(millis); + timestamp.setNanos((int) nanos); + return timestamp; + } + + /** + * Transforms a timestamp into a string, whose formatting and timezone is consistent + * across Kudu. + * @param timestamp the timestamp + * @return a string, in the format: YYYY-MM-DDTHH:MM:SS.ssssssZ + */ + public static String timestampToString(Timestamp timestamp) { + long micros = timestampToMicros(timestamp); + return timestampToString(micros); + } + + /** + * Transforms a timestamp into a string, whose formatting and timezone is consistent + * across Kudu. + * @param micros the timestamp, in microseconds + * @return a string, in the format: YYYY-MM-DDTHH:MM:SS.ssssssZ + */ + public static String timestampToString(long micros) { + long tsMillis = micros / 1000L; + long tsMicros = micros % 1000000L; + String tsStr = DATE_FORMAT.get().format(new Date(tsMillis)); + return String.format("%s.%06dZ", tsStr, tsMicros); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/TestSchema.java b/java-scala-spark4/kudu-client/src/test/java/org/TestSchema.java new file mode 100644 index 0000000000..9fecd7c38c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/TestSchema.java @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; + +import org.junit.Test; + +public class TestSchema { + + @Test + public void testEquals() { + final ColumnSchema col1 = new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32) + .nullable(false) + .key(true) + .build(); + final ColumnSchema col2 = new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(false) + .build(); + + ArrayList columns = new ArrayList<>(); + columns.add(col1); + columns.add(col2); + final Schema schema = new Schema(columns); + + ArrayList columns1 = new ArrayList<>(); + columns1.add(col1); + columns1.add(col2); + final Schema schema1 = new Schema(columns1); + + // Two objects are the same. + assertTrue(schema1.equals(schema1)); + // One of object is not type of 'Schema'. + assertFalse(schema1.equals(columns1)); + // Two schemas are the same structure. + assertTrue(schema1.equals(schema)); + + final ColumnSchema col3 = new ColumnSchema.ColumnSchemaBuilder("c2", Type.INT32) + .nullable(false) + .key(true) + .build(); + + ArrayList columns2 = new ArrayList<>(); + columns2.add(col1); + columns2.add(col3); + final Schema schema2 = new Schema(columns2); + + // Two schemas have different number of primary keys. + assertFalse(schema1.equals(schema2)); + + ArrayList columns3 = new ArrayList<>(); + columns3.add(col1); + columns3.add(col2); + columns3.add(col3); + final Schema schema3 = new Schema(columns3); + + // Two schemas have different number of columns. + assertFalse(schema1.equals(schema3)); + + final ColumnSchema col4 = new ColumnSchema.ColumnSchemaBuilder("c3", Type.INT32) + .nullable(false) + .build(); + + ArrayList columns4 = new ArrayList<>(); + columns4.add(col1); + columns4.add(col2); + columns4.add(col4); + final Schema schema4 = new Schema(columns4); + + final ColumnSchema col5 = new ColumnSchema.ColumnSchemaBuilder("c4", Type.INT32) + .nullable(false) + .build(); + ArrayList columns5 = new ArrayList<>(); + columns5.add(col1); + columns5.add(col2); + columns5.add(col5); + final Schema schema5 = new Schema(columns5); + + // Two schemas have different column names. + assertFalse(schema4.equals(schema5)); + + final ColumnSchema col6 = new ColumnSchema.ColumnSchemaBuilder("c4", Type.STRING) + .nullable(false) + .build(); + + ArrayList columns6 = new ArrayList<>(); + columns6.add(col1); + columns6.add(col2); + columns6.add(col6); + final Schema schema6 = new Schema(columns6); + + // Two schemas have different column types. + assertFalse(schema5.equals(schema6)); + + ArrayList columns7 = new ArrayList<>(); + columns7.add(col1); + columns7.add(col6); + columns7.add(col2); + final Schema schema7 = new Schema(columns7); + + // Two schemas have different sequence of columns. + assertFalse(schema6.equals(schema7)); + + final ColumnSchema col7 = new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(true) + .build(); + // Two column schemas with exact the same types, names, sequence of columns + // but different nullability for a non-key column + ArrayList columns8 = new ArrayList<>(); + columns7.add(col1); + columns7.add(col6); + columns7.add(col7); + final Schema schema8 = new Schema(columns8); + + assertFalse(schema7.equals(schema8)); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/TestColumnSchema.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/TestColumnSchema.java new file mode 100644 index 0000000000..c1fd3c9e57 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/TestColumnSchema.java @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.function.ThrowingRunnable; + +import org.apache.kudu.ColumnSchema.AutoIncrementingColumnSchemaBuilder; +import org.apache.kudu.ColumnSchema.ColumnSchemaBuilder; +import org.apache.kudu.test.junit.RetryRule; +import org.apache.kudu.util.CharUtil; +import org.apache.kudu.util.DecimalUtil; + +public class TestColumnSchema { + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Test + public void testToString() { + final ColumnSchema col1 = new ColumnSchemaBuilder("col1", Type.STRING).build(); + final ColumnSchema col2 = new ColumnSchemaBuilder("col2", Type.INT64).build(); + final ColumnSchema col3 = new ColumnSchemaBuilder("col3", Type.DECIMAL) + .typeAttributes(DecimalUtil.typeAttributes(5, 2)) + .build(); + final ColumnSchema col4 = new ColumnSchemaBuilder("col4", Type.INT16) + .comment("test comment").build(); + + assertEquals("Column name: col1, type: string", col1.toString()); + assertEquals("Column name: col2, type: int64", col2.toString()); + assertEquals("Column name: col3, type: decimal(5, 2)", col3.toString()); + assertEquals("Column name: col4, type: int16, comment: test comment", col4.toString()); + } + + @Test + public void testEquals() { + ColumnSchema stringCol1 = new ColumnSchemaBuilder("col1", Type.STRING) + .defaultValue("test") + .build(); + // Same instance + assertEquals(stringCol1, stringCol1); + + // Same value + ColumnSchema stringCol2 = new ColumnSchemaBuilder("col1", Type.STRING) + .defaultValue("test") + .build(); + assertEquals(stringCol1, stringCol2); + + // Different by key + ColumnSchema isKey = new ColumnSchemaBuilder("col1", Type.STRING) + .key(true) + .build(); + Assert.assertTrue(isKey.isKey()); + assertNotEquals(stringCol1, isKey); + + // Difference between key and nonUniqueKey + ColumnSchema isNonUniqueKey = new ColumnSchemaBuilder("col1", Type.STRING) + .nonUniqueKey(true) + .build(); + Assert.assertTrue(isNonUniqueKey.isKey()); + Assert.assertFalse(isNonUniqueKey.isKeyUnique()); + assertNotEquals(isKey, isNonUniqueKey); + + // Different by type + ColumnSchema isInt = new ColumnSchemaBuilder("col1", Type.INT32) + .build(); + assertNotEquals(stringCol1, isInt); + + // Same with type attributes + ColumnSchema decCol1 = new ColumnSchemaBuilder("col1", Type.DECIMAL) + .typeAttributes(DecimalUtil.typeAttributes(9, 2)) + .build(); + ColumnSchema decCol2 = new ColumnSchemaBuilder("col1", Type.DECIMAL) + .typeAttributes(DecimalUtil.typeAttributes(9, 2)) + .build(); + assertEquals(decCol1, decCol2); + + // Different by type attributes + ColumnSchema decCol3 = new ColumnSchemaBuilder("col1", Type.DECIMAL) + .typeAttributes(DecimalUtil.typeAttributes(9, 0)) + .build(); + assertNotEquals(decCol1, decCol3); + + // Same with comment + ColumnSchema commentInt1 = new ColumnSchemaBuilder("col1", Type.INT32).comment("test").build(); + ColumnSchema commentInt2 = new ColumnSchemaBuilder("col1", Type.INT32).comment("test").build(); + assertEquals(commentInt1, commentInt2); + + // Different by comment + ColumnSchema commentInt3 = new ColumnSchemaBuilder("col1", Type.INT32).comment("Test").build(); + assertNotEquals(commentInt1, commentInt3); + } + + @Test + public void testOutOfRangeVarchar() throws Exception { + Throwable thrown = Assert.assertThrows(IllegalArgumentException.class, new ThrowingRunnable() { + @Override + public void run() throws Exception { + new ColumnSchemaBuilder("col1", Type.VARCHAR) + .typeAttributes(CharUtil.typeAttributes(70000)).build(); + } + }); + Assert.assertTrue(thrown.getMessage() + .contains("VARCHAR's length must be set and between 1 and 65535")); + } + + @Test + public void testVarcharWithoutLength() throws Exception { + Throwable thrown = Assert.assertThrows(IllegalArgumentException.class, new ThrowingRunnable() { + @Override + public void run() throws Exception { + new ColumnSchemaBuilder("col1", Type.VARCHAR).build(); + } + }); + Assert.assertTrue(thrown.getMessage() + .contains("VARCHAR's length must be set and between 1 and 65535")); + } + + @Test + public void testAutoIncrementing() throws Exception { + // Create auto-incrementing column with AutoIncrementingColumnSchemaBuilder + ColumnSchema autoIncrementing = new AutoIncrementingColumnSchemaBuilder().build(); + Assert.assertTrue(autoIncrementing.isAutoIncrementing()); + assertEquals(Schema.getAutoIncrementingColumnType(), autoIncrementing.getType()); + Assert.assertTrue(autoIncrementing.isKey()); + Assert.assertFalse(autoIncrementing.isKeyUnique()); + Assert.assertFalse(autoIncrementing.isNullable()); + Assert.assertFalse(autoIncrementing.isImmutable()); + assertEquals(null, autoIncrementing.getDefaultValue()); + + // Create column with auto-incrementing column name with ColumnSchemaBuilder + Throwable thrown = Assert.assertThrows(IllegalArgumentException.class, new ThrowingRunnable() { + @Override + public void run() throws Exception { + new ColumnSchemaBuilder(Schema.getAutoIncrementingColumnName(), + Schema.getAutoIncrementingColumnType()).build(); + } + }); + Assert.assertTrue(thrown.getMessage().contains("Column name " + + Schema.getAutoIncrementingColumnName() + " is reserved by Kudu engine")); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/TestType.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/TestType.java new file mode 100644 index 0000000000..2552a6789d --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/TestType.java @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu; + +import static org.junit.Assert.assertEquals; + +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.junit.RetryRule; + +public class TestType { + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Test + public void testGetTypeForName() { + String name = Type.INT64.getName(); + Type newType = Type.getTypeForName(name); + + assertEquals("Get Type from getName()", Type.INT64, newType); + + name = Type.INT64.name(); + newType = Type.getTypeForName(name); + + assertEquals("Get Type from name()", Type.INT64, newType); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITClient.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITClient.java new file mode 100644 index 0000000000..5e5943f8dc --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITClient.java @@ -0,0 +1,464 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableList; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.RandomUtils; + +/** + * Integration test for the client. RPCs are sent to Kudu from multiple threads while processes + * are restarted and failures are injected. + * + * By default this test runs for 60 seconds, but this can be changed by passing a different value + * in "itclient.runtime.seconds". For example: + * "mvn test -Dtest=ITClient -Ditclient.runtime.seconds=120". + */ +public class ITClient { + + private static final Logger LOG = LoggerFactory.getLogger(ITClient.class); + + private static final String RUNTIME_PROPERTY_NAME = "itclient.runtime.seconds"; + private static final long DEFAULT_RUNTIME_SECONDS = 60; + + // Time we'll spend waiting at the end of the test for things to settle. Also + // the minimum this test can run for. + private static final long TEST_MIN_RUNTIME_SECONDS = 2; + + private static final long TEST_TIMEOUT_SECONDS = 600000; + + private static final String TABLE_NAME = + ITClient.class.getName() + "-" + System.currentTimeMillis(); + + // Tracks whether it's time for the test to end or not. + private CountDownLatch keepRunningLatch; + + // If the test fails, will contain an exception that describes the failure. + private Exception failureException; + + private KuduTable table; + private long runtimeInSeconds; + + private volatile long sharedWriteTimestamp; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() throws Exception { + // Set (or reset, in the event of a retry) test state. + keepRunningLatch = new CountDownLatch(1); + failureException = null; + sharedWriteTimestamp = 0; + + // Extract and verify the test's running time. + String runtimeProp = System.getProperty(RUNTIME_PROPERTY_NAME); + runtimeInSeconds = runtimeProp == null ? DEFAULT_RUNTIME_SECONDS : Long.parseLong(runtimeProp); + if (runtimeInSeconds < TEST_MIN_RUNTIME_SECONDS || runtimeInSeconds > TEST_TIMEOUT_SECONDS) { + Assert.fail("This test needs to run more than " + TEST_MIN_RUNTIME_SECONDS + " seconds" + + " and less than " + TEST_TIMEOUT_SECONDS + " seconds"); + } + LOG.info("Test will run for {} seconds", runtimeInSeconds); + + // Create the test table. + CreateTableOptions builder = new CreateTableOptions().setNumReplicas(3); + builder.setRangePartitionColumns(ImmutableList.of("key")); + table = harness.getClient().createTable(TABLE_NAME, getBasicSchema(), builder); + } + + @Test(timeout = TEST_TIMEOUT_SECONDS) + public void test() throws Exception { + List threads = new ArrayList<>(); + threads.add(new Thread(new ChaosThread(), "chaos-test-thread")); + threads.add(new Thread(new WriterThread(), "writer-test-thread")); + threads.add(new Thread(new ScannerThread(), "scanner-test-thread")); + + for (Thread thread : threads) { + thread.setUncaughtExceptionHandler(new UncaughtExceptionHandler()); + thread.start(); + } + + // If we time out here, the test ran to completion and passed. Otherwise, a + // count down was triggered from an error and the test failed. + boolean failure = keepRunningLatch.await(runtimeInSeconds, TimeUnit.SECONDS); + if (!failure) { + // The test passed but the threads are still running; tell them to stop. + keepRunningLatch.countDown(); + } + + for (Thread thread : threads) { + // Give plenty of time for threads to stop. + thread.join(DEFAULT_SLEEP); + } + + if (failure) { + throw failureException; + } + + // If the test passed, do some extra validation at the end. + AsyncKuduScanner scannerBuilder = harness.getAsyncClient() + .newScannerBuilder(table) + .build(); + int rowCount = countRowsInScan(scannerBuilder); + Assert.assertTrue(rowCount + " should be higher than 0", rowCount > 0); + } + + /** + * Logs an error message and triggers the count down latch, stopping this test. + * @param message error message to print + * @param exception optional exception to print + */ + private void reportError(String message, Exception exception) { + failureException = new Exception(message, exception); + keepRunningLatch.countDown(); + } + + /** + * Thread that introduces chaos in the cluster, one at a time. + */ + class ChaosThread implements Runnable { + + private final Random random = RandomUtils.getRandom(); + + @Override + public void run() { + try { + keepRunningLatch.await(2, TimeUnit.SECONDS); + } catch (InterruptedException e) { + return; + } + while (keepRunningLatch.getCount() > 0) { + try { + boolean shouldContinue; + int randomInt = random.nextInt(3); + if (randomInt == 0) { + shouldContinue = restartTS(); + } else if (randomInt == 1) { + shouldContinue = disconnectNode(); + } else { + shouldContinue = restartMaster(); + } + + if (!shouldContinue) { + return; + } + keepRunningLatch.await(5, TimeUnit.SECONDS); + } catch (InterruptedException e) { + return; + } + + } + } + + /** + * Failure injection. Picks a random tablet server from the client's cache and force + * disconnects it. + * @return true if successfully completed or didn't find a server to disconnect, false it it + * encountered a failure + */ + @SuppressWarnings("FutureReturnValueIgnored") + private boolean disconnectNode() { + try { + final List connections = harness.getAsyncClient().getConnectionListCopy(); + if (connections.isEmpty()) { + return true; + } + connections.get(random.nextInt(connections.size())).disconnect(); + } catch (Exception e) { + if (keepRunningLatch.getCount() == 0) { + // Likely shutdown() related. + return false; + } + reportError("Couldn't disconnect a TS", e); + return false; + } + return true; + } + + /** + * Forces the restart of a random tablet server. + * @return true if it successfully completed, false if it failed + */ + private boolean restartTS() { + try { + harness.restartTabletServer(table); + } catch (Exception e) { + reportError("Couldn't restart a TS", e); + return false; + } + return true; + } + + /** + * Forces the restart of the master. + * @return true if it successfully completed, false if it failed + */ + private boolean restartMaster() { + try { + harness.restartLeaderMaster(); + } catch (Exception e) { + reportError("Couldn't restart a master", e); + return false; + } + return true; + } + + } + + /** + * Thread that writes sequentially to the table. Every 10 rows it considers setting the flush mode + * to MANUAL_FLUSH or AUTO_FLUSH_SYNC. + */ + class WriterThread implements Runnable { + + private final KuduSession session = harness.getClient().newSession(); + private final Random random = RandomUtils.getRandom(); + private int currentRowKey = 0; + + @Override + public void run() { + session.setExternalConsistencyMode(ExternalConsistencyMode.CLIENT_PROPAGATED); + while (keepRunningLatch.getCount() > 0) { + try { + OperationResponse resp = session.apply(createBasicSchemaInsert(table, currentRowKey)); + if (hasRowErrorAndReport(resp)) { + return; + } + currentRowKey++; + + // Every 10 rows we flush and change the flush mode randomly. + if (currentRowKey % 10 == 0) { + + // First flush any accumulated rows before switching. + List responses = session.flush(); + if (responses != null) { + for (OperationResponse batchedResp : responses) { + if (hasRowErrorAndReport(batchedResp)) { + return; + } + } + } + + if (random.nextBoolean()) { + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + } else { + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + } + } + } catch (Exception e) { + if (keepRunningLatch.getCount() == 0) { + // Likely shutdown() related. + return; + } + reportError("Got error while inserting row " + currentRowKey, e); + return; + } + } + } + + private boolean hasRowErrorAndReport(OperationResponse resp) { + if (resp != null && resp.hasRowError()) { + reportError("The following RPC " + resp.getOperation().getRow() + + " returned this error: " + resp.getRowError(), null); + return true; + } + + if (resp == null) { + return false; + } + + sharedWriteTimestamp = resp.getWriteTimestampRaw(); + + return false; + } + } + + /** + * Thread that scans the table. Alternates randomly between random gets and full table scans. + */ + class ScannerThread implements Runnable { + + private final Random random = RandomUtils.getRandom(); + + // Updated by calling a full scan. + private int lastRowCount = 0; + + @Override + public void run() { + while (keepRunningLatch.getCount() > 0) { + boolean shouldContinue; + + // First check if we've written at least one row. + if (sharedWriteTimestamp == 0) { + shouldContinue = true; + } else if (lastRowCount == 0 || // Need to full scan once before random reading + random.nextBoolean()) { + shouldContinue = fullScan(); + } else { + shouldContinue = randomGet(); + } + + if (!shouldContinue) { + return; + } + + if (lastRowCount == 0) { + try { + keepRunningLatch.await(50, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + // Test is stopping. + return; + } + } + } + } + + /** + * Reads a row at random that should exist (smaller than lastRowCount). + * @return true if the get was successful, false if there was an error + */ + private boolean randomGet() { + int key = random.nextInt(lastRowCount); + KuduPredicate predicate = KuduPredicate.newComparisonPredicate( + table.getSchema().getColumnByIndex(0), KuduPredicate.ComparisonOp.EQUAL, key); + KuduScanner scanner = getScannerBuilder() + .addPredicate(predicate) + .build(); + + List results = new ArrayList<>(); + for (RowResult row : scanner) { + results.add(row); + } + if (results.size() != 1) { + reportError("Random get got 0 or many rows " + results.size() + " for key " + key, null); + return false; + } + + int receivedKey = results.get(0).getInt(0); + if (receivedKey != key) { + reportError("Tried to get key " + key + " and received " + receivedKey, null); + return false; + } + return true; + } + + /** + * Runs a full table scan and updates the lastRowCount. + * @return true if the full scan was successful, false if there was an error + */ + private boolean fullScan() { + int rowCount; + TimeoutTracker timeoutTracker = new TimeoutTracker(); + timeoutTracker.setTimeout(DEFAULT_SLEEP); + + while (keepRunningLatch.getCount() > 0 && !timeoutTracker.timedOut()) { + KuduScanner scanner = getScannerBuilder().build(); + + try { + rowCount = countRowsInScan(scanner); + } catch (KuduException e) { + return checkAndReportError("Got error while row counting", e); + } + + if (rowCount >= lastRowCount) { + if (rowCount > lastRowCount) { + lastRowCount = rowCount; + LOG.info("New row count {}", lastRowCount); + } + return true; + } else { + reportError("Row count unexpectedly decreased from " + lastRowCount + " to " + rowCount, + null); + } + + // Due to the lack of KUDU-430, we need to loop for a while. + try { + keepRunningLatch.await(50, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + // No need to do anything, we'll exit the loop once we test getCount() in the condition. + } + } + return !timeoutTracker.timedOut(); + } + + private KuduScanner.KuduScannerBuilder getScannerBuilder() { + return harness.getClient().newScannerBuilder(table) + .readMode(AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT) + .snapshotTimestampRaw(sharedWriteTimestamp) + .setFaultTolerant(true); + } + + /** + * Checks the passed exception contains "Scanner not found". If it does then it returns true, + * else it reports the error and returns false. + * We need to do this because the scans in this client aren't fault tolerant. + * @param message message to print if the exception contains a real error + * @param e the exception to check + * @return true if the scanner failed on a non-FATAL error, otherwise false which will kill + * this test + */ + private boolean checkAndReportError(String message, KuduException e) { + // It's possible to get timeouts if we're unlucky. A particularly common one is + // "could not wait for desired snapshot timestamp to be consistent" since we're using + // READ_AT_SNAPSHOT scanners. + // TODO revisit once KUDU-1656 is taken care of. + if (e.getStatus().isTimedOut()) { + LOG.warn("Received a scan timeout", e); + return true; + } + // Do nasty things, expect nasty results. The scanners are a bit too happy to retry TS + // disconnections so we might end up retrying a scanner on a node that restarted, or we might + // get disconnected just after sending an RPC so when we reconnect to the same TS we might get + // the "Invalid call sequence ID" message. + if (!e.getStatus().isNotFound() && + !e.getStatus().getMessage().contains("Invalid call sequence ID")) { + reportError(message, e); + return false; + } + return true; + } + } + + private class UncaughtExceptionHandler implements Thread.UncaughtExceptionHandler { + @Override + public void uncaughtException(Thread t, Throwable e) { + // Only report an error if we're still running, else we'll spam the log. + if (keepRunningLatch.getCount() != 0) { + reportError("Uncaught exception", new Exception(e)); + } + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITClientStress.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITClientStress.java new file mode 100644 index 0000000000..bf4962ea87 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITClientStress.java @@ -0,0 +1,180 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.createFourTabletsTableWithNineRows; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.Closeable; +import java.io.IOException; +import java.util.concurrent.Callable; +import java.util.concurrent.SynchronousQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import com.google.common.base.Stopwatch; +import com.google.common.base.Supplier; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.Schema; +import org.apache.kudu.client.SessionConfiguration.FlushMode; +import org.apache.kudu.test.CapturingLogAppender; +import org.apache.kudu.test.ClientTestUtil; +import org.apache.kudu.test.KuduTestHarness; + +public class ITClientStress { + private static final Logger LOG = LoggerFactory.getLogger(ITClientStress.class); + + private static final Schema basicSchema = ClientTestUtil.getBasicSchema(); + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @SuppressWarnings("FutureReturnValueIgnored") + private void runTasks(int numThreads, int secondsToRun, + Supplier> taskFactory) throws InterruptedException, IOException { + // Capture any exception thrown by the tasks. + final AtomicReference thrown = new AtomicReference<>(null); + + // Setup a pool with synchronous handoff. + SynchronousQueue queue = new SynchronousQueue<>(); + ThreadPoolExecutor pool = new ThreadPoolExecutor( + numThreads, numThreads, 100, TimeUnit.MILLISECONDS, + queue, new ThreadPoolExecutor.CallerRunsPolicy()); + + // Capture logs so we can check that no exceptions are logged. + CapturingLogAppender cla = new CapturingLogAppender(); + try (Closeable c = cla.attach()) { + Stopwatch s = Stopwatch.createStarted(); + while (s.elapsed(TimeUnit.SECONDS) < secondsToRun && + thrown.get() == null) { + final Callable task = taskFactory.get(); + // Wrap the task so that if it throws an exception, we stop and + // fail the test. + Runnable wrapped = new Runnable() { + @Override + public void run() { + try { + task.call(); + } catch (Throwable t) { + thrown.set(t); + } + } + }; + pool.submit(wrapped); + } + } finally { + pool.shutdown(); + assertTrue(pool.awaitTermination(10, TimeUnit.SECONDS)); + } + if (thrown.get() != null) { + throw new AssertionError(thrown.get()); + } + assertFalse("log contained NPE", + cla.getAppendedText().contains("NullPointerException")); + assertFalse("log contained SSLException", + cla.getAppendedText().contains("SSLException")); + assertFalse("log contained IllegalStateException", + cla.getAppendedText().contains("IllegalStateException")); + } + + /** + * Regression test for KUDU-1963. This simulates the behavior of the + * Impala 2.8 front-end under a high-concurrency workload. Each query + * starts a new client, fetches scan tokens, and closes the client. + */ + @Test(timeout = 300000) + public void testManyShortClientsGeneratingScanTokens() throws Exception { + final String TABLE_NAME = "testManyClients"; + final int SECONDS_TO_RUN = 10; + final int NUM_THREADS = 80; + createFourTabletsTableWithNineRows(harness.getAsyncClient(), TABLE_NAME, DEFAULT_SLEEP); + + runTasks(NUM_THREADS, SECONDS_TO_RUN, new Supplier>() { + @Override + public Callable get() { + return new Callable() { + @Override + public Void call() throws Exception { + try (AsyncKuduClient client = + new AsyncKuduClient.AsyncKuduClientBuilder(harness.getMasterAddressesAsString()) + .defaultAdminOperationTimeoutMs(DEFAULT_SLEEP) + .build()) { + KuduTable t = client.openTable(TABLE_NAME).join(); + new KuduScanToken.KuduScanTokenBuilder(client, t).build(); + } + return null; + } + }; + } + }); + } + + /** + * Stress test which performs upserts from many sessions on different threads + * sharing the same KuduClient and KuduTable instance. + */ + @Test(timeout = 300000) + public void testMultipleSessions() throws Exception { + final String TABLE_NAME = "testMultipleSessions"; + final int SECONDS_TO_RUN = 10; + final int NUM_THREADS = 60; + final KuduTable table = harness.getClient().createTable(TABLE_NAME, basicSchema, + getBasicCreateTableOptions()); + final AtomicInteger numUpserted = new AtomicInteger(0); + runTasks(NUM_THREADS, SECONDS_TO_RUN, new Supplier>() { + @Override + public Callable get() { + return new Callable() { + @Override + public Void call() throws Exception { + KuduSession s = harness.getClient().newSession(); + s.setFlushMode(FlushMode.AUTO_FLUSH_SYNC); + try { + for (int i = 0; i < 100; i++) { + Upsert u = table.newUpsert(); + u.getRow().addInt(0, i); + u.getRow().addInt(1, 12345); + u.getRow().addInt(2, 3); + u.getRow().setNull(3); + u.getRow().addBoolean(4, false); + OperationResponse apply = s.apply(u); + if (apply.hasRowError()) { + throw new AssertionError(apply.getRowError().toString()); + } + numUpserted.incrementAndGet(); + } + } finally { + s.close(); + } + return null; + } + }; + } + }); + LOG.info("Upserted {} rows", numUpserted.get()); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITFaultTolerantScanner.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITFaultTolerantScanner.java new file mode 100644 index 0000000000..089cfd304f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITFaultTolerantScanner.java @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.junit.Test; + +/** + * Integration test on fault tolerant scanner that inserts enough data + * to trigger flushes and getting multiple data blocks. + */ +public class ITFaultTolerantScanner extends ITScannerMultiTablet { + /** + * Verifies for fault tolerant scanner, it can proceed + * properly even if disconnects client connection. + */ + @Test(timeout = 100000) + public void testFaultTolerantDisconnect() throws KuduException { + clientFaultInjection(true); + } + + /** + * Tests fault tolerant scanner by restarting the tablet server in the middle + * of tablet scanning and verifies the scan results are as expected. + */ + @Test(timeout = 100000) + public void testFaultTolerantScannerRestartFirstScanRequest() throws Exception { + serverFaultInjection(true, true, false); + } + + /** + * Tests fault tolerant scanner by restarting the tserver in the middle + * of tablet scanning and verifies the scan results are as expected. + * Notice, the fault injection happens at the 2nd ScanRequest or next scan + * request rather than the first scan request. + * @throws Exception + */ + @Test(timeout = 100000) + public void testFaultTolerantScannerRestartAfterSecondScanRequest() throws Exception { + serverFaultInjectionRestartAfterSecondScanRequest(); + } + + /** + * Tests fault tolerant scanner by killing the tablet server in the middle + * of tablet scanning and verifies the scan results are as expected. + */ + @Test(timeout = 100000) + public void testFaultTolerantScannerKill() throws Exception { + serverFaultInjection(false, true, false); + } + + /** + * Tests fault tolerant scanner by killing the tablet server while scanning + * (after finish scan of first tablet) and verifies the scan results are as expected. + */ + @Test(timeout = 100000) + public void testFaultTolerantScannerKillFinishFirstTablet() throws Exception { + serverFaultInjection(false, true, true); + } + + /** + * Tests fault tolerant scanner by restarting the tablet server while scanning + * (after finish scan of first tablet) and verifies the scan results are as expected. + */ + @Test(timeout = 100000) + public void testFaultTolerantScannerRestartFinishFirstTablet() throws Exception { + serverFaultInjection(true, true, true); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITNonFaultTolerantScanner.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITNonFaultTolerantScanner.java new file mode 100644 index 0000000000..9f5a76635c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITNonFaultTolerantScanner.java @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +/** + * Integration test on non fault tolerant scanner that inserts enough data + * to trigger flushes and getting multiple data blocks. + */ +public class ITNonFaultTolerantScanner extends ITScannerMultiTablet { + + /** + * Test for KUDU-1343 with a multi-batch multi-tablet scan. + */ + @Test(timeout = 100000) + public void testKudu1343() throws Exception { + KuduScanner scanner = harness.getClient().newScannerBuilder(table) + .batchSizeBytes(1) // Just a hint, won't actually be that small + .build(); + + int rowCount = 0; + int loopCount = 0; + while (scanner.hasMoreRows()) { + loopCount++; + RowResultIterator rri = scanner.nextRows(); + rowCount += rri.getNumRows(); + } + + assertTrue(loopCount > TABLET_COUNT); + assertEquals(ROW_COUNT, rowCount); + } + + /** + * Verifies for non fault tolerant scanner, it can proceed + * properly even if shuts down client connection. + */ + @Test(timeout = 100000) + public void testNonFaultTolerantDisconnect() throws KuduException { + clientFaultInjection(false); + } + + /** + * Tests non fault tolerant scanner by killing the tablet server while scanning and + * verifies it throws {@link NonRecoverableException} as expected. + */ + @Test(timeout = 100000, expected = NonRecoverableException.class) + public void testNonFaultTolerantScannerKill() throws Exception { + serverFaultInjection(false, false, false); + } + + /** + * Tests non fault tolerant scanner by restarting the tablet server while scanning and + * verifies it throws {@link NonRecoverableException} as expected. + */ + @Test(timeout = 100000, expected = NonRecoverableException.class) + public void testNonFaultTolerantScannerRestart() throws Exception { + serverFaultInjection(true, false, false); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITScannerMultiTablet.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITScannerMultiTablet.java new file mode 100644 index 0000000000..1f61d2c736 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/ITScannerMultiTablet.java @@ -0,0 +1,335 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.Lists; +import org.junit.Before; +import org.junit.Rule; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.Schema; +import org.apache.kudu.test.KuduTestHarness; + +/** + * Integration test that inserts enough data to trigger flushes and getting multiple data + * blocks. + */ +public class ITScannerMultiTablet { + + private static final Logger LOG = LoggerFactory.getLogger(ITScannerMultiTablet.class); + private static final String TABLE_NAME = + ITScannerMultiTablet.class.getName() + "-" + System.currentTimeMillis(); + protected static final int ROW_COUNT = 20000; + protected static final int TABLET_COUNT = 3; + protected static final String METRIC_NAME = "total_duration_nanos"; + + private static Schema schema = getBasicSchema(); + protected KuduTable table; + + private static Random random = new Random(1234); + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() throws Exception { + CreateTableOptions builder = new CreateTableOptions(); + + builder.addHashPartitions( + Lists.newArrayList(schema.getColumnByIndex(0).getName()), + TABLET_COUNT); + + table = harness.getClient().createTable(TABLE_NAME, schema, builder); + KuduSession session = harness.getClient().newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + + Set primaryKeys = new HashSet(); + // Getting meaty rows. + char[] chars = new char[1024]; + for (int i = 0; i < ROW_COUNT; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + int id = random.nextInt(); + while (id == Integer.MIN_VALUE || primaryKeys.contains(id)) { + id = random.nextInt(); + } + row.addInt(0, id); + primaryKeys.add(id); + row.addInt(1, i); + row.addInt(2, i); + row.addString(3, new String(chars)); + row.addBoolean(4, true); + session.apply(insert); + } + session.flush(); + session.close(); + // Log on error details. + if (session.countPendingErrors() > 0) { + LOG.info("RowErrorsAndOverflowStatus: {}", session.getPendingErrors().toString()); + } + assertEquals(0, session.countPendingErrors()); + } + + /** + * Injecting failures (kill or restart TabletServer) while scanning, to verify: + * fault tolerant scanner will continue scan and non-fault tolerant scanner will throw + * {@link NonRecoverableException}. + * + * Also makes sure we pass all the correct information down to the server by verifying + * we get rows in order from 3 tablets. We detect those tablet boundaries when keys suddenly + * become smaller than what was previously seen. + * + * @param restart if true restarts TabletServer, otherwise kills TabletServer + * @param isFaultTolerant if true uses fault tolerant scanner, otherwise + * uses non fault-tolerant one + * @param finishFirstScan if true injects failure before finishing first tablet scan, + * otherwise in the middle of tablet scanning + * @throws Exception + */ + void serverFaultInjection(boolean restart, boolean isFaultTolerant, + boolean finishFirstScan) throws Exception { + KuduScanner scanner = harness.getClient().newScannerBuilder(table) + .setFaultTolerant(isFaultTolerant) + .batchSizeBytes(1) + .setProjectedColumnIndexes(Lists.newArrayList(0)).build(); + + try { + int rowCount = 0; + int previousRow = -1; + int tableBoundariesCount = 0; + if (scanner.hasMoreRows()) { + RowResultIterator rri = scanner.nextRows(); + while (rri.hasNext()) { + int key = rri.next().getInt(0); + if (key < previousRow) { + tableBoundariesCount++; + } + previousRow = key; + rowCount++; + } + } + + if (!finishFirstScan) { + if (restart) { + harness.restartTabletServer(scanner.currentTablet()); + } else { + harness.killTabletLeader(scanner.currentTablet()); + } + } + + boolean failureInjected = false; + while (scanner.hasMoreRows()) { + RowResultIterator rri = scanner.nextRows(); + while (rri.hasNext()) { + int key = rri.next().getInt(0); + if (key < previousRow) { + tableBoundariesCount++; + if (finishFirstScan && !failureInjected) { + if (restart) { + harness.restartTabletServer(scanner.currentTablet()); + } else { + harness.killTabletLeader(scanner.currentTablet()); + } + failureInjected = true; + } + } + previousRow = key; + rowCount++; + } + } + + assertEquals(ROW_COUNT, rowCount); + assertEquals(TABLET_COUNT, tableBoundariesCount); + } finally { + scanner.close(); + } + } + + /** + * Inject failures (kill or restart TabletServer) while scanning, + * Inject failure (restart TabletServer) to a tablet's leader while + * scanning after second scan request, to verify: + * a fault tolerant scanner will continue scanning and a non-fault tolerant scanner will throw + * {@link NonRecoverableException}. + * + * @throws Exception + */ + void serverFaultInjectionRestartAfterSecondScanRequest() throws Exception { + // In fact, the test has TABLET_COUNT, default is 3. + // We check the rows' order, no dup rows and loss rows. + // And In the case, we need 2 times or more scan requests, + // so set a minimum batchSizeBytes 1. + KuduScanToken.KuduScanTokenBuilder tokenBuilder = harness.getClient().newScanTokenBuilder(table) + .batchSizeBytes(1) + .setFaultTolerant(true) + .setProjectedColumnIndexes(Lists.newArrayList(0)); + + List tokens = tokenBuilder.build(); + assertTrue(tokens.size() == TABLET_COUNT); + + class TabletScannerTask implements Callable { + private KuduScanToken token; + private boolean enableFaultInjection; + + public TabletScannerTask(KuduScanToken token, boolean enableFaultInjection) { + this.token = token; + this.enableFaultInjection = enableFaultInjection; + } + + @Override + public Integer call() { + int rowCount = 0; + KuduScanner scanner; + try { + scanner = this.token.intoScanner(harness.getClient()); + } catch (IOException e) { + LOG.error("Generate KuduScanner error, {}", e.getMessage()); + e.printStackTrace(); + return -1; + } + try { + int previousRow = Integer.MIN_VALUE; + boolean faultInjected = !this.enableFaultInjection; + int faultInjectionLowBound = (ROW_COUNT / TABLET_COUNT / 2); + boolean firstScanRequest = true; + + long firstScannedMetric = 0; + long firstPropagatedTimestamp = 0; + long lastScannedMetric = 0; + long lastPropagatedTimestamp = 0; + while (scanner.hasMoreRows()) { + RowResultIterator rri = scanner.nextRows(); + while (rri.hasNext()) { + int key = rri.next().getInt(0); + if (previousRow >= key) { + LOG.error("Impossible results, previousKey: {} >= currentKey: {}", + previousRow, key); + return -1; + } + if (!faultInjected && rowCount > faultInjectionLowBound) { + harness.restartTabletServer(scanner.currentTablet()); + faultInjected = true; + } else { + if (firstScanRequest) { + firstScannedMetric = scanner.getResourceMetrics().getMetric(METRIC_NAME); + firstPropagatedTimestamp = harness.getClient().getLastPropagatedTimestamp(); + firstScanRequest = false; + } + lastScannedMetric = scanner.getResourceMetrics().getMetric(METRIC_NAME); + lastPropagatedTimestamp = harness.getClient().getLastPropagatedTimestamp(); + } + previousRow = key; + rowCount++; + } + } + assertTrue(lastScannedMetric != firstScannedMetric); + assertTrue(lastPropagatedTimestamp > firstPropagatedTimestamp); + } catch (Exception e) { + LOG.error("Scan error, {}", e.getMessage()); + e.printStackTrace(); + } finally { + try { + scanner.close(); + } catch (KuduException e) { + LOG.warn(e.getMessage()); + e.printStackTrace(); + } + } + return rowCount; + } + } + + int rowCount = 0; + ExecutorService threadPool = Executors.newFixedThreadPool(TABLET_COUNT); + List tabletScannerTasks = new ArrayList<>(); + tabletScannerTasks.add(new TabletScannerTask(tokens.get(0), true)); + for (int i = 1; i < tokens.size(); i++) { + tabletScannerTasks.add(new TabletScannerTask(tokens.get(i), false)); + } + List> results = threadPool.invokeAll(tabletScannerTasks); + threadPool.shutdown(); + assertTrue(threadPool.awaitTermination(100, TimeUnit.SECONDS)); + for (Future result : results) { + try { + rowCount += result.get(); + } catch (Exception e) { + LOG.info(e.getMessage()); + assertTrue(false); + } + } + assertEquals(ROW_COUNT, rowCount); + } + + /** + * Injecting failures (i.e. drop client connection) while scanning, to verify: + * both non-fault tolerant scanner and fault tolerant scanner will continue scan as expected. + * + * @param isFaultTolerant if true use fault-tolerant scanner, otherwise use non-fault-tolerant one + * @throws Exception + */ + @SuppressWarnings("FutureReturnValueIgnored") + void clientFaultInjection(boolean isFaultTolerant) throws KuduException { + KuduScanner scanner = harness.getClient().newScannerBuilder(table) + .setFaultTolerant(isFaultTolerant) + .batchSizeBytes(1) + .build(); + + try { + int rowCount = 0; + int loopCount = 0; + if (scanner.hasMoreRows()) { + loopCount++; + RowResultIterator rri = scanner.nextRows(); + rowCount += rri.getNumRows(); + } + + // Forcefully disconnects the current connection and fails all outstanding RPCs + // in the middle of scanning. + harness.getAsyncClient().newRpcProxy(scanner.currentTablet().getReplicaSelectedServerInfo( + scanner.getReplicaSelection(), /* location= */"")).getConnection().disconnect(); + + while (scanner.hasMoreRows()) { + loopCount++; + RowResultIterator rri = scanner.nextRows(); + rowCount += rri.getNumRows(); + } + + assertTrue(loopCount > TABLET_COUNT); + assertEquals(ROW_COUNT, rowCount); + } finally { + scanner.close(); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAlterTable.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAlterTable.java new file mode 100644 index 0000000000..d1f41c8292 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAlterTable.java @@ -0,0 +1,1285 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInTable; +import static org.apache.kudu.test.ClientTestUtil.scanTableToStrings; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.function.ThrowingRunnable; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnSchema.CompressionAlgorithm; +import org.apache.kudu.ColumnSchema.Encoding; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.util.Pair; + +public class TestAlterTable { + private String tableName; + private KuduClient client; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + tableName = TestKuduClient.class.getName() + "-" + System.currentTimeMillis(); + } + + /** + * Creates a new table with two int columns, c0 and c1. c0 is the primary key. + * The table is hash partitioned on c0 into two buckets, and range partitioned + * with the provided bounds. + */ + private KuduTable createTable(List> bounds) throws KuduException { + return createTable(bounds, null, 2); + } + + /** + * Creates a new table with two int columns, c0 and c1. c0 is the primary key. + * The table is hash partitioned on c0 into two buckets, and range partitioned + * with the provided bounds and the specified owner. + */ + private KuduTable createTable(List> bounds, String owner, + int buckets) + throws KuduException { + // Create initial table with single range partition covering the entire key + // space, and two hash buckets. + ArrayList columns = new ArrayList<>(1); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32) + .nullable(false) + .key(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(false) + .build()); + Schema schema = new Schema(columns); + + CreateTableOptions createOptions = + new CreateTableOptions().setRangePartitionColumns(ImmutableList.of("c0")) + .setNumReplicas(1); + if (buckets > 1) { + createOptions = createOptions.addHashPartitions(ImmutableList.of("c0"), buckets); + } + + for (Pair bound : bounds) { + PartialRow lower = schema.newPartialRow(); + PartialRow upper = schema.newPartialRow(); + lower.addInt("c0", bound.getFirst()); + upper.addInt("c0", bound.getSecond()); + createOptions.addRangePartition(lower, upper); + } + + if (owner != null) { + createOptions.setOwner(owner); + } + + return client.createTable(tableName, schema, createOptions); + } + + /** + * Insert rows into the provided table. The table's columns must be ints, and + * must have a primary key in the first column. + * @param table the table + * @param start the inclusive start key + * @param end the exclusive end key + */ + private void insertRows(KuduTable table, int start, int end) throws KuduException { + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + for (int i = start; i < end; i++) { + Insert insert = table.newInsert(); + for (int idx = 0; idx < table.getSchema().getColumnCount(); idx++) { + insert.getRow().addInt(idx, i); + } + session.apply(insert); + } + session.flush(); + RowError[] rowErrors = session.getPendingErrors().getRowErrors(); + assertEquals(String.format("row errors: %s", Arrays.toString(rowErrors)), 0, rowErrors.length); + } + + private int countRows(KuduTable table) throws KuduException { + KuduScanner scanner = client.newScannerBuilder(table).build(); + int rowCount = 0; + while (scanner.hasMoreRows()) { + RowResultIterator it = scanner.nextRows(); + rowCount += it.getNumRows(); + } + return rowCount; + } + + // This unit test is used to verify the problem KUDU-3483. Without the fix, + // this unit test will throw an out of index exception. + @Test + public void testInsertDataWithChangedSchema() throws Exception { + // Create a table with single partition in order to make all operations + // fall into the same tablet. + KuduTable table = createTable(ImmutableList.of(), null, 1); + final KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + + // Test case with the same table schema. + { + Insert insert = table.newInsert(); + PartialRow row1 = insert.getRow(); + row1.addInt("c0", 101); + row1.addInt("c1", 101); + session.apply(insert); + + Upsert upsert = table.newUpsert(); + PartialRow row2 = upsert.getRow(); + row2.addInt("c0", 102); + row2.addInt("c1", 102); + session.apply(upsert); + List responses = session.flush(); + assertEquals(responses.size(), 2); + + RowError[] rowErrors = session.getPendingErrors().getRowErrors(); + assertEquals(String.format("row errors: %s", + Arrays.toString(rowErrors)), 0, rowErrors.length); + assertEquals(2, countRows(table)); + } + + // Test case with adding columns. + { + // Upsert a row with the old schema. + Upsert upsert1 = table.newUpsert(); + PartialRow row1 = upsert1.getRow(); + row1.addInt("c0", 103); + row1.addInt("c1", 103); + session.apply(upsert1); + + // Add one new column. + client.alterTable(tableName, new AlterTableOptions() + .addColumn("addNonNull", Type.INT32, 100)); + + // Reopen the table with new schema. + table = client.openTable(tableName); + assertEquals(3, table.getSchema().getColumnCount()); + + // Upsert a row with the new schema. + Upsert upsert2 = table.newUpsert(); + PartialRow row2 = upsert2.getRow(); + row2.addInt("c0", 104); + row2.addInt("c1", 104); + row2.addInt("addNonNull", 101); + + session.apply(upsert2); + List responses = session.flush(); + assertEquals(responses.size(), 2); + + RowError[] rowErrors = session.getPendingErrors().getRowErrors(); + assertEquals(String.format("row errors: %s", + Arrays.toString(rowErrors)), 0, rowErrors.length); + + // Read the data. It contains 4 rows. + assertEquals(4, countRows(table)); + } + + // Test case with renamed columns. + { + table = client.openTable(tableName); + // Upsert a row. + Upsert upsert1 = table.newUpsert(); + PartialRow row1 = upsert1.getRow(); + row1.addInt("c0", 105); + row1.addInt("c1", 105); + row1.addInt("addNonNull", 101); + session.apply(upsert1); + + // Rename one column. + client.alterTable(tableName, new AlterTableOptions() + .renameColumn("addNonNull", "newAddNonNull")); + + // Reopen the table with the new schema. + table = client.openTable(tableName); + assertEquals(3, table.getSchema().getColumnCount()); + + // Upsert a row with the new schema. + Upsert upsert2 = table.newUpsert(); + PartialRow row2 = upsert2.getRow(); + row2.addInt("c0", 106); + row2.addInt("c1", 106); + row2.addInt("newAddNonNull", 101); + session.apply(upsert2); + List responses = session.flush(); + assertEquals(responses.size(), 2); + + RowError[] rowErrors = session.getPendingErrors().getRowErrors(); + assertEquals(String.format("row errors: %s", + Arrays.toString(rowErrors)), 1, rowErrors.length); + assertTrue(Arrays.toString(rowErrors) + .contains("Client provided column addNonNull INT32 NOT NULL not present in tablet")); + + // Read the data. It contains 5 rows, one row failed to insert. + assertEquals(5, countRows(table)); + } + + // Test case with drop columns. + { + // Upsert a row. + Upsert upsert1 = table.newUpsert(); + PartialRow row1 = upsert1.getRow(); + row1.addInt("c0", 107); + row1.addInt("c1", 107); + row1.addInt("newAddNonNull", 101); + session.apply(upsert1); + + // Drop one column. + client.alterTable(tableName, new AlterTableOptions() + .dropColumn("newAddNonNull")); + + // Reopen the table with the new schema. + table = client.openTable(tableName); + assertEquals(2, table.getSchema().getColumnCount()); + + // Upsert a row with the new schema. + Upsert upsert2 = table.newUpsert(); + PartialRow row2 = upsert2.getRow(); + row2.addInt("c0", 108); + row2.addInt("c1", 108); + session.apply(upsert2); + List responses = session.flush(); + assertEquals(responses.size(), 2); + + RowError[] rowErrors = session.getPendingErrors().getRowErrors(); + assertEquals(String.format("row errors: %s", + Arrays.toString(rowErrors)), 1, rowErrors.length); + assertTrue(Arrays.toString(rowErrors) + .contains("Client provided column newAddNonNull INT32 NOT NULL not present in tablet")); + + // Read the data. It contains 6 rows, one row failed to insert. + assertEquals(6, countRows(table)); + } + } + + @Test + public void testAlterAddColumns() throws Exception { + KuduTable table = createTable(ImmutableList.of()); + insertRows(table, 0, 100); + assertEquals(100, countRowsInTable(table)); + + client.alterTable(tableName, new AlterTableOptions() + .addColumn("addNonNull", Type.INT32, 100) + .addNullableColumn("addNullable", Type.INT32) + .addNullableColumn("addNullableDef", Type.INT32, 200)); + + // Reopen table for the new schema. + table = client.openTable(tableName); + assertEquals(5, table.getSchema().getColumnCount()); + + // Add a row with addNullableDef=null + final KuduSession session = client.newSession(); + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt("c0", 101); + row.addInt("c1", 101); + row.addInt("addNonNull", 101); + row.addInt("addNullable", 101); + row.setNull("addNullableDef"); + session.apply(insert); + session.flush(); + RowError[] rowErrors = session.getPendingErrors().getRowErrors(); + assertEquals(String.format("row errors: %s", Arrays.toString(rowErrors)), 0, rowErrors.length); + + // Check defaults applied, and that row key=101 + final List actual = scanTableToStrings(table); + final List expected = new ArrayList<>(101); + for (int i = 0; i < 100; i++) { + expected.add(i, String.format("INT32 c0=%d, INT32 c1=%d, INT32 addNonNull=100" + + ", INT32 addNullable=NULL, INT32 addNullableDef=200", i, i)); + } + expected.add("INT32 c0=101, INT32 c1=101, INT32 addNonNull=101" + + ", INT32 addNullable=101, INT32 addNullableDef=NULL"); + Collections.sort(expected); + assertArrayEquals(expected.toArray(new String[0]), actual.toArray(new String[0])); + + NonRecoverableException thrown = + Assert.assertThrows(NonRecoverableException.class, new ThrowingRunnable() { + @Override + public void run() throws Exception { + // Add duplicate column + client.alterTable(tableName, new AlterTableOptions() + .addNullableColumn("addNullable", Type.INT32)); + } + }); + Assert.assertTrue(thrown.getStatus().isAlreadyPresent()); + Assert.assertTrue(thrown.getMessage().contains("The column already exists")); + } + + @Test + public void testAlterModifyColumns() throws Exception { + KuduTable table = createTable(ImmutableList.of()); + insertRows(table, 0, 100); + assertEquals(100, countRowsInTable(table)); + + // Check for expected defaults. + ColumnSchema col = table.getSchema().getColumns().get(1); + assertEquals(CompressionAlgorithm.DEFAULT_COMPRESSION, col.getCompressionAlgorithm()); + assertEquals(Encoding.AUTO_ENCODING, col.getEncoding()); + assertNull(col.getDefaultValue()); + + // Alter the table. + client.alterTable(tableName, new AlterTableOptions() + .changeCompressionAlgorithm(col.getName(), CompressionAlgorithm.SNAPPY) + .changeEncoding(col.getName(), Encoding.RLE) + .changeDefault(col.getName(), 0)); + + // Check for new values. + table = client.openTable(tableName); + col = table.getSchema().getColumns().get(1); + assertEquals(CompressionAlgorithm.SNAPPY, col.getCompressionAlgorithm()); + assertEquals(Encoding.RLE, col.getEncoding()); + assertEquals(0, col.getDefaultValue()); + } + + @Test + public void testRenameKeyColumn() throws Exception { + KuduTable table = createTable(ImmutableList.of()); + insertRows(table, 0, 100); + assertEquals(100, countRowsInTable(table)); + + client.alterTable(tableName, new AlterTableOptions() + .renameColumn("c0", "c0Key")); + + // scanning with the old schema + try { + KuduScanner scanner = client.newScannerBuilder(table) + .setProjectedColumnNames(Lists.newArrayList("c0", "c1")).build(); + while (scanner.hasMoreRows()) { + scanner.nextRows(); + } + fail(); + } catch (KuduException e) { + assertTrue(e.getStatus().isInvalidArgument()); + assertTrue(e.getStatus().getMessage().contains( + "Some columns are not present in the current schema: c0")); + } + + // Reopen table for the new schema. + table = client.openTable(tableName); + assertEquals("c0Key", table.getSchema().getPrimaryKeyColumns().get(0).getName()); + assertEquals(2, table.getSchema().getColumnCount()); + + // Add a row + KuduSession session = client.newSession(); + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt("c0Key", 101); + row.addInt("c1", 101); + session.apply(insert); + session.flush(); + RowError[] rowErrors = session.getPendingErrors().getRowErrors(); + assertEquals(String.format("row errors: %s", Arrays.toString(rowErrors)), 0, rowErrors.length); + + KuduScanner scanner = client.newScannerBuilder(table) + .setProjectedColumnNames(Lists.newArrayList("c0Key", "c1")).build(); + int rowCount = 0; + while (scanner.hasMoreRows()) { + RowResultIterator it = scanner.nextRows(); + while (it.hasNext()) { + RowResult rr = it.next(); + assertEquals(rr.getInt(0), rr.getInt(1)); + ++rowCount; + } + } + assertEquals(101, rowCount); + } + + @Test + public void testAlterRangePartitioning() throws Exception { + KuduTable table = createTable(ImmutableList.of()); + Schema schema = table.getSchema(); + + // Insert some rows, and then drop the partition and ensure that the table is empty. + insertRows(table, 0, 100); + assertEquals(100, countRowsInTable(table)); + PartialRow lower = schema.newPartialRow(); + PartialRow upper = schema.newPartialRow(); + client.alterTable(tableName, new AlterTableOptions().dropRangePartition(lower, upper)); + assertEquals(0, countRowsInTable(table)); + + // Add new range partition and insert rows. + lower.addInt("c0", 0); + upper.addInt("c0", 100); + client.alterTable(tableName, new AlterTableOptions().addRangePartition(lower, upper)); + insertRows(table, 0, 100); + assertEquals(100, countRowsInTable(table)); + + // Replace the range partition with a different one. + AlterTableOptions options = new AlterTableOptions(); + options.dropRangePartition(lower, upper); + lower.addInt("c0", 50); + upper.addInt("c0", 150); + options.addRangePartition(lower, upper); + client.alterTable(tableName, options); + assertEquals(0, countRowsInTable(table)); + insertRows(table, 50, 125); + assertEquals(75, countRowsInTable(table)); + + // Replace the range partition with the same one. + client.alterTable(tableName, new AlterTableOptions().dropRangePartition(lower, upper) + .addRangePartition(lower, upper)); + assertEquals(0, countRowsInTable(table)); + insertRows(table, 50, 125); + assertEquals(75, countRowsInTable(table)); + + // Alter table partitioning + alter table schema + lower.addInt("c0", 200); + upper.addInt("c0", 300); + client.alterTable(tableName, new AlterTableOptions().addRangePartition(lower, upper) + .renameTable(tableName + "-renamed") + .addNullableColumn("c2", Type.INT32)); + tableName = tableName + "-renamed"; + insertRows(table, 200, 300); + assertEquals(175, countRowsInTable(table)); + assertEquals(3, client.openTable(tableName).getSchema().getColumnCount()); + + // Drop all range partitions + alter table schema. This also serves to test + // specifying range bounds with a subset schema (since a column was + // previously added). + options = new AlterTableOptions(); + options.dropRangePartition(lower, upper); + lower.addInt("c0", 50); + upper.addInt("c0", 150); + options.dropRangePartition(lower, upper); + options.dropColumn("c2"); + client.alterTable(tableName, options); + assertEquals(0, countRowsInTable(table)); + assertEquals(2, client.openTable(tableName).getSchema().getColumnCount()); + } + + /** + * Test creating and altering a table with range partitions with exclusive + * lower bounds and inclusive upper bounds. + */ + @Test + public void testAlterRangePartitioningExclusiveInclusive() throws Exception { + // Create initial table with single range partition covering (-1, 99]. + ArrayList columns = new ArrayList<>(1); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32) + .nullable(false) + .key(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(false) + .build()); + Schema schema = new Schema(columns); + + CreateTableOptions createOptions = + new CreateTableOptions().setRangePartitionColumns(ImmutableList.of("c0")) + .setNumReplicas(1); + + PartialRow lower = schema.newPartialRow(); + PartialRow upper = schema.newPartialRow(); + lower.addInt("c0", -1); + upper.addInt("c0", 99); + createOptions.addRangePartition(lower, upper, + RangePartitionBound.EXCLUSIVE_BOUND, + RangePartitionBound.INCLUSIVE_BOUND); + + final KuduTable table = client.createTable(tableName, schema, createOptions); + + lower.addInt("c0", 199); + upper.addInt("c0", 299); + client.alterTable(tableName, new AlterTableOptions().addRangePartition( + lower, upper, RangePartitionBound.EXCLUSIVE_BOUND, RangePartitionBound.INCLUSIVE_BOUND)); + + // Insert some rows, and then drop the partition and ensure that the table is empty. + insertRows(table, 0, 100); + insertRows(table, 200, 300); + assertEquals(200, countRowsInTable(table)); + + AlterTableOptions alter = new AlterTableOptions(); + lower.addInt("c0", 0); + upper.addInt("c0", 100); + alter.dropRangePartition(lower, upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + lower.addInt("c0", 199); + upper.addInt("c0", 299); + alter.dropRangePartition(lower, upper, + RangePartitionBound.EXCLUSIVE_BOUND, + RangePartitionBound.INCLUSIVE_BOUND); + client.alterTable(tableName, alter); + + assertEquals(0, countRowsInTable(table)); + } + + @Test + public void testAlterRangePartitioningInvalid() throws KuduException { + // Create initial table with single range partition covering [0, 100). + KuduTable table = createTable(ImmutableList.of(new Pair<>(0, 100))); + Schema schema = table.getSchema(); + insertRows(table, 0, 100); + assertEquals(100, countRowsInTable(table)); + + // ADD [0, 100) <- already present (duplicate) + PartialRow lower = schema.newPartialRow(); + PartialRow upper = schema.newPartialRow(); + lower.addInt("c0", 0); + upper.addInt("c0", 100); + try { + client.alterTable(tableName, new AlterTableOptions().addRangePartition(lower, upper)); + fail(); + } catch (KuduException e) { + assertTrue(e.getStatus().isAlreadyPresent()); + assertTrue(e.getStatus().getMessage().contains( + "range partition already exists")); + } + assertEquals(100, countRowsInTable(table)); + + // ADD [50, 150) <- illegal (overlap) + lower.addInt("c0", 50); + upper.addInt("c0", 150); + try { + client.alterTable(tableName, new AlterTableOptions().addRangePartition(lower, upper)); + fail(); + } catch (KuduException e) { + assertTrue(e.getStatus().isInvalidArgument()); + assertTrue(e.getStatus().getMessage().contains( + "new range partition conflicts with existing one")); + } + assertEquals(100, countRowsInTable(table)); + + // ADD [-50, 50) <- illegal (overlap) + lower.addInt("c0", -50); + upper.addInt("c0", 50); + try { + client.alterTable(tableName, new AlterTableOptions().addRangePartition(lower, upper)); + fail(); + } catch (KuduException e) { + assertTrue(e.getStatus().isInvalidArgument()); + assertTrue(e.getStatus().getMessage().contains( + "new range partition conflicts with existing one")); + } + assertEquals(100, countRowsInTable(table)); + + // ADD [200, 300) + // ADD [-50, 150) <- illegal (overlap) + lower.addInt("c0", 200); + upper.addInt("c0", 300); + AlterTableOptions options = new AlterTableOptions(); + options.addRangePartition(lower, upper); + lower.addInt("c0", -50); + upper.addInt("c0", 150); + options.addRangePartition(lower, upper); + try { + client.alterTable(tableName, options); + fail(); + } catch (KuduException e) { + assertTrue(e.getStatus().isInvalidArgument()); + assertTrue(e.getStatus().getMessage().contains( + "new range partition conflicts with existing one")); + } + assertEquals(100, countRowsInTable(table)); + + // DROP [, ) + try { + client.alterTable(tableName, + new AlterTableOptions().dropRangePartition(schema.newPartialRow(), + schema.newPartialRow())); + fail(); + } catch (KuduException e) { + assertTrue(e.getStatus().isInvalidArgument()); + assertTrue(e.getStatus().getMessage(), e.getStatus().getMessage().contains( + "no range partition to drop")); + } + assertEquals(100, countRowsInTable(table)); + + // DROP [50, 150) + // RENAME foo + lower.addInt("c0", 50); + upper.addInt("c0", 150); + try { + client.alterTable(tableName, new AlterTableOptions().dropRangePartition(lower, upper) + .renameTable("foo")); + fail(); + } catch (KuduException e) { + assertTrue(e.getStatus().isInvalidArgument()); + assertTrue(e.getStatus().getMessage().contains( + "no range partition to drop")); + } + assertEquals(100, countRowsInTable(table)); + assertFalse(client.tableExists("foo")); + + // DROP [0, 100) + // ADD [100, 200) + // DROP [100, 200) + // ADD [150, 250) + // DROP [0, 10) <- illegal + options = new AlterTableOptions(); + + lower.addInt("c0", 0); + upper.addInt("c0", 100); + options.dropRangePartition(lower, upper); + + lower.addInt("c0", 100); + upper.addInt("c0", 200); + options.addRangePartition(lower, upper); + options.dropRangePartition(lower, upper); + + lower.addInt("c0", 150); + upper.addInt("c0", 250); + options.addRangePartition(lower, upper); + + lower.addInt("c0", 0); + upper.addInt("c0", 10); + options.dropRangePartition(lower, upper); + try { + client.alterTable(tableName, options); + fail(); + } catch (KuduException e) { + assertTrue(e.getStatus().isInvalidArgument()); + assertTrue(e.getStatus().getMessage().contains( + "no range partition to drop")); + } + assertEquals(100, countRowsInTable(table)); + } + + /** + * Test altering a table, adding range partitions with custom hash schema + * per range. + */ + @Test(timeout = 100000) + public void testAlterAddRangeWithCustomHashSchema() throws Exception { + ArrayList columns = new ArrayList<>(2); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32) + .nullable(false) + .key(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(false) + .build()); + final Schema schema = new Schema(columns); + + CreateTableOptions createOptions = + new CreateTableOptions() + .setRangePartitionColumns(ImmutableList.of("c0")) + .addHashPartitions(ImmutableList.of("c0"), 2, 0) + .setNumReplicas(1); + + { + // Add range partition with the table-wide hash schema (to be added upon + // creating the new table). + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + createOptions.addRangePartition(lower, upper); + } + + client.createTable(tableName, schema, createOptions); + + // Alter the table: add a range partition with custom hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 200); + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 3, 0); + client.alterTable(tableName, new AlterTableOptions().addRangePartition(range)); + } + + KuduTable table = client.openTable(tableName); + + // Insert some rows and then drop partitions, ensuring the row count comes + // as expected. + insertRows(table, -100, 100); + assertEquals(200, countRowsInTable(table)); + insertRows(table, 100, 200); + assertEquals(300, countRowsInTable(table)); + + { + AlterTableOptions alter = new AlterTableOptions(); + alter.setWait(true); + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + assertEquals(100, countRowsInTable(table)); + } + + { + AlterTableOptions alter = new AlterTableOptions(); + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 200); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + assertEquals(0, countRowsInTable(table)); + } + + // Make sure it's possible to delete/drop the table after adding and then + // dropping a range with custom hash schema. + client.deleteTable(tableName); + } + + /** + * Test altering a table, adding unbounded range partitions + * with custom hash schema. + */ + @Test(timeout = 100000) + public void testAlterAddUnboundedRangeWithCustomHashSchema() throws Exception { + ArrayList columns = new ArrayList<>(2); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32) + .nullable(false) + .key(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(false) + .build()); + final Schema schema = new Schema(columns); + + CreateTableOptions createOptions = + new CreateTableOptions() + .setRangePartitionColumns(ImmutableList.of("c0")) + .addHashPartitions(ImmutableList.of("c0"), 2, 0) + .setNumReplicas(1); + // Add range partition [-100, 100) with the table-wide hash schema + // (to be added upon creating the new table below). + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + createOptions.addRangePartition(lower, upper); + } + // Add unbounded range partition [100, +inf) with custom hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 100); + PartialRow upper = schema.newPartialRow(); + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 5, 0); + createOptions.addRangePartition(range); + } + + client.createTable(tableName, schema, createOptions); + + // Alter the table: add unbounded range partition [-inf, -100) with custom hash schema. + { + PartialRow lower = schema.newPartialRow(); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", -100); + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 3, 0); + client.alterTable(tableName, new AlterTableOptions().addRangePartition(range)); + } + + KuduTable table = client.openTable(tableName); + + // Insert some rows and then drop partitions, ensuring the row count comes + // out as expected. + insertRows(table, -250, -200); + assertEquals(50, countRowsInTable(table)); + insertRows(table, -200, -50); + assertEquals(200, countRowsInTable(table)); + insertRows(table, -50, 50); + assertEquals(300, countRowsInTable(table)); + insertRows(table, 50, 200); + assertEquals(450, countRowsInTable(table)); + insertRows(table, 200, 250); + assertEquals(500, countRowsInTable(table)); + + { + AlterTableOptions alter = new AlterTableOptions(); + alter.setWait(true); + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + assertEquals(300, countRowsInTable(table)); + } + { + AlterTableOptions alter = new AlterTableOptions(); + PartialRow lower = schema.newPartialRow(); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", -100); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + assertEquals(150, countRowsInTable(table)); + } + { + AlterTableOptions alter = new AlterTableOptions(); + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 100); + PartialRow upper = schema.newPartialRow(); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + assertEquals(0, countRowsInTable(table)); + } + + client.deleteTable(tableName); + } + + /** + * Test altering a table, adding range partitions with custom hash schema + * per range and dropping partition in the middle, resulting in non-covered + * ranges between partition with the table-wide and custom hash schemas. + */ + @Test(timeout = 100000) + public void testAlterAddRangeWithCustomHashSchemaNonCoveredRange() throws Exception { + ArrayList columns = new ArrayList<>(2); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32) + .nullable(false) + .key(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(false) + .build()); + final Schema schema = new Schema(columns); + + CreateTableOptions createOptions = + new CreateTableOptions() + .setRangePartitionColumns(ImmutableList.of("c0")) + .addHashPartitions(ImmutableList.of("c0"), 2, 0) + .setNumReplicas(1); + + // Add 3 range partitions with the table-wide hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -300); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", -200); + createOptions.addRangePartition(lower, upper); + } + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + createOptions.addRangePartition(lower, upper); + } + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 200); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 300); + createOptions.addRangePartition(lower, upper); + } + + client.createTable(tableName, schema, createOptions); + + // Add range partitions with custom hash schemas, interlaced with the + // partitions having the table-wide hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -400); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", -300); + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 3, 0); + client.alterTable(tableName, new AlterTableOptions().addRangePartition(range)); + } + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -200); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", -100); + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 4, 0); + client.alterTable(tableName, new AlterTableOptions().addRangePartition(range)); + } + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 200); + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 5, 0); + client.alterTable(tableName, new AlterTableOptions().addRangePartition(range)); + } + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 300); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 400); + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 6, 0); + client.alterTable(tableName, new AlterTableOptions().addRangePartition(range)); + } + + KuduTable table = client.openTable(tableName); + + // Insert some rows and then drop partitions, ensuring the row count comes + // as expected. + insertRows(table, -400, 0); + assertEquals(400, countRowsInTable(table)); + + insertRows(table, 0, 400); + assertEquals(800, countRowsInTable(table)); + + // Drop one range with table-wide hash schema in the very middle of the + // covered ranges. + { + AlterTableOptions alter = new AlterTableOptions(); + alter.setWait(true); + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + } + assertEquals(600, countRowsInTable(table)); + + { + AlterTableOptions alter = new AlterTableOptions(); + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -400); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", -300); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + } + assertEquals(500, countRowsInTable(table)); + + { + AlterTableOptions alter = new AlterTableOptions(); + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 200); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + } + assertEquals(400, countRowsInTable(table)); + + { + AlterTableOptions alter = new AlterTableOptions(); + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -200); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", -100); + alter.dropRangePartition(lower, upper); + client.alterTable(tableName, alter); + } + assertEquals(300, countRowsInTable(table)); + + // Make sure it's possible to delete/drop the table after adding and then + // dropping a range with custom hash schema. + client.deleteTable(tableName); + } + + @Test(timeout = 100000) + @KuduTestHarness.MasterServerConfig(flags = { + "--enable_per_range_hash_schemas=false", + }) + public void testAlterTryAddRangeWithCustomHashSchema() throws Exception { + ArrayList columns = new ArrayList<>(2); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32) + .nullable(false) + .key(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(false) + .build()); + final Schema schema = new Schema(columns); + + CreateTableOptions createOptions = + new CreateTableOptions() + .setRangePartitionColumns(ImmutableList.of("c0")) + .addHashPartitions(ImmutableList.of("c0"), 2, 0) + .setNumReplicas(1); + + client.createTable(tableName, schema, createOptions); + + // Try adding a range partition with custom hash schema when server side + // doesn't support the RANGE_SPECIFIC_HASH_SCHEMA feature. + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 0); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 3, 0); + try { + client.alterTable(tableName, new AlterTableOptions().addRangePartition(range)); + fail("shouldn't be able to add a range with custom hash schema " + + "in a table when server side doesn't support required " + + "RANGE_SPECIFIC_HASH_SCHEMA feature"); + } catch (KuduException ex) { + final String errmsg = ex.getMessage(); + assertTrue(errmsg, ex.getStatus().isRemoteError()); + assertTrue(errmsg, errmsg.matches( + ".* server sent error unsupported feature flags")); + } + } + } + + @Test(timeout = 100000) + public void testAlterTryAddRangeWithCustomHashSchemaDuplicateColumns() + throws Exception { + ArrayList columns = new ArrayList<>(2); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32) + .nullable(false) + .key(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(false) + .key(true) + .build()); + final Schema schema = new Schema(columns); + + CreateTableOptions createOptions = + new CreateTableOptions() + .setRangePartitionColumns(ImmutableList.of("c0")) + .addHashPartitions(ImmutableList.of("c0"), 2, 0) + .addHashPartitions(ImmutableList.of("c1"), 3, 0) + .setNumReplicas(1); + + // Add range partition with table-wide hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 0); + createOptions.addRangePartition(lower, upper); + } + + client.createTable(tableName, schema, createOptions); + + // Try adding a range partition with custom hash schema having multiple + // hash dimensions and conflicting on columns used for hash function: + // different dimensions should not intersect on the set of columns + // used for hashing. + { + PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 0); + PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 3, 0); + range.addHashPartitions(ImmutableList.of("c0"), 3, 0); + try { + client.alterTable(tableName, new AlterTableOptions().addRangePartition(range)); + fail("shouldn't be able to add a range with custom hash schema " + + "having duplicate hash columns across different dimensions"); + } catch (KuduException ex) { + final String errmsg = ex.getMessage(); + assertTrue(errmsg, ex.getStatus().isInvalidArgument()); + assertTrue(errmsg, errmsg.matches( + "hash bucket schema components must not contain columns in common")); + } + } + } + + @Test + public void testAlterExtraConfigs() throws Exception { + KuduTable table = createTable(ImmutableList.of()); + insertRows(table, 0, 100); + assertEquals(100, countRowsInTable(table)); + + // 1. Check for expected defaults. + table = client.openTable(tableName); + Map extraConfigs = table.getExtraConfig(); + assertFalse(extraConfigs.containsKey("kudu.table.history_max_age_sec")); + + // 2. Alter history max age second to 3600 + Map alterExtraConfigs = new HashMap<>(); + alterExtraConfigs.put("kudu.table.history_max_age_sec", "3600"); + client.alterTable(tableName, new AlterTableOptions().alterExtraConfigs(alterExtraConfigs)); + + table = client.openTable(tableName); + extraConfigs = table.getExtraConfig(); + assertTrue(extraConfigs.containsKey("kudu.table.history_max_age_sec")); + assertEquals("3600", extraConfigs.get("kudu.table.history_max_age_sec")); + + // 3. Alter history max age second to 7200 + alterExtraConfigs = new HashMap<>(); + alterExtraConfigs.put("kudu.table.history_max_age_sec", "7200"); + client.alterTable(tableName, new AlterTableOptions().alterExtraConfigs(alterExtraConfigs)); + + table = client.openTable(tableName); + extraConfigs = table.getExtraConfig(); + assertTrue(extraConfigs.containsKey("kudu.table.history_max_age_sec")); + assertEquals("7200", extraConfigs.get("kudu.table.history_max_age_sec")); + + // 4. Reset history max age second to default + alterExtraConfigs = new HashMap<>(); + alterExtraConfigs.put("kudu.table.history_max_age_sec", ""); + client.alterTable(tableName, new AlterTableOptions().alterExtraConfigs(alterExtraConfigs)); + + table = client.openTable(tableName); + assertTrue(table.getExtraConfig().isEmpty()); + } + + @Test + @KuduTestHarness.MasterServerConfig(flags = { "--max_num_columns=10" }) + public void testAlterExceedsColumnLimit() throws Exception { + ArrayList columns = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + columns.add(new ColumnSchema.ColumnSchemaBuilder(Integer.toString(i), Type.INT32) + .key(i == 0) + .build()); + } + Schema schema = new Schema(columns); + CreateTableOptions createOptions = + new CreateTableOptions().setRangePartitionColumns(ImmutableList.of("0")); + client.createTable(tableName, schema, createOptions); + + NonRecoverableException thrown = + Assert.assertThrows(NonRecoverableException.class, new ThrowingRunnable() { + @Override + public void run() throws Exception { + client.alterTable(tableName, + new AlterTableOptions().addNullableColumn("11", Type.INT32)); + } + }); + Assert.assertTrue(thrown.getStatus().isInvalidArgument()); + Assert.assertTrue(thrown.getMessage() + .contains("number of columns 11 is greater than the permitted maximum 10")); + } + + @Test + public void testAlterChangeOwner() throws Exception { + String originalOwner = "alice"; + KuduTable table = createTable(ImmutableList.of(), originalOwner, 2); + assertEquals(originalOwner, table.getOwner()); + + String newOwner = "bob"; + client.alterTable(table.getName(), new AlterTableOptions().setOwner(newOwner)); + table = client.openTable(table.getName()); + assertEquals(newOwner, table.getOwner()); + } + + @Test + public void testAlterChangeComment() throws Exception { + String originalComment = "original comment"; + ArrayList columns = new ArrayList<>(1); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("val", Type.INT32).build()); + Schema schema = new Schema(columns); + CreateTableOptions createOptions = new CreateTableOptions() + .setRangePartitionColumns(ImmutableList.of("key")) + .setComment(originalComment); + KuduTable table = client.createTable(tableName, schema, createOptions); + assertEquals(originalComment, table.getComment()); + + String newComment = "new comment"; + client.alterTable(table.getName(), new AlterTableOptions().setComment(newComment)); + table = client.openTable(table.getName()); + assertEquals(newComment, table.getComment()); + } + + @Test + public void testAlterAddAndRemoveImmutableAttribute() throws Exception { + KuduTable table = createTable(ImmutableList.of()); + insertRows(table, 0, 100); + assertEquals(100, countRowsInTable(table)); + + client.alterTable(tableName, new AlterTableOptions() + .changeImmutable("c1", true)); + table = client.openTable(table.getName()); + assertTrue(table.getSchema().getColumn("c1").isImmutable()); + + insertRows(table, 100, 200); + assertEquals(200, countRowsInTable(table)); + + client.alterTable(tableName, new AlterTableOptions() + .changeImmutable("c1", false)); + table = client.openTable(table.getName()); + assertFalse(table.getSchema().getColumn("c1").isImmutable()); + + insertRows(table, 200, 300); + assertEquals(300, countRowsInTable(table)); + + final ColumnSchema immu_col = new ColumnSchema.ColumnSchemaBuilder("immu_col", Type.INT32) + .nullable(true).immutable(true).build(); + client.alterTable(tableName, new AlterTableOptions().addColumn(immu_col)); + table = client.openTable(table.getName()); + assertTrue(table.getSchema().getColumn("immu_col").isImmutable()); + + insertRows(table, 300, 400); + assertEquals(400, countRowsInTable(table)); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAsyncKuduClient.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAsyncKuduClient.java new file mode 100644 index 0000000000..be613a4390 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAsyncKuduClient.java @@ -0,0 +1,344 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static junit.framework.TestCase.assertFalse; +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import com.google.common.base.Splitter; +import com.google.common.base.Stopwatch; +import com.google.protobuf.ByteString; +import com.stumbleupon.async.Deferred; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Common; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.consensus.Metadata; +import org.apache.kudu.master.Master; +import org.apache.kudu.test.ClientTestUtil; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.ProtobufUtils; + +public class TestAsyncKuduClient { + + private static final Schema basicSchema = ClientTestUtil.getBasicSchema(); + + private KuduClient client; + private AsyncKuduClient asyncClient; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + } + + @Test(timeout = 100000) + public void testDisconnect() throws Exception { + // Set to 1 to always test disconnecting the right server. + CreateTableOptions options = getBasicCreateTableOptions().setNumReplicas(1); + KuduTable table = client.createTable( + "testDisconnect-" + System.currentTimeMillis(), + basicSchema, + options); + + // Test that we can reconnect to a TS after a disconnection. + // 1. Warm up the cache. + assertEquals(0, countRowsInScan(client.newScannerBuilder(table).build())); + + // 2. Disconnect the client. + disconnectAndWait(); + + // 3. Count again, it will trigger a re-connection and we should not hang or fail to scan. + assertEquals(0, countRowsInScan(client.newScannerBuilder(table).build())); + + // Test that we can reconnect to a TS while scanning. + // 1. Insert enough rows to have to call next() multiple times. + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + int rowCount = 200; + for (int i = 0; i < rowCount; i++) { + session.apply(createBasicSchemaInsert(table, i)); + } + session.flush(); + + // 2. Start a scanner with a small max num bytes. + AsyncKuduScanner scanner = asyncClient.newScannerBuilder(table) + .batchSizeBytes(1) + .build(); + Deferred rri = scanner.nextRows(); + // 3. Register the number of rows we get back. We have no control over how many rows are + // returned. When this test was written we were getting 100 rows back. + int numRows = rri.join(DEFAULT_SLEEP).getNumRows(); + assertNotEquals("The TS sent all the rows back, we can't properly test disconnection", + rowCount, numRows); + + // 4. Disconnect the client. + disconnectAndWait(); + + // 5. Make sure that we can continue scanning and that we get the remaining rows back. + assertEquals(rowCount - numRows, countRowsInScan(scanner)); + } + + @SuppressWarnings("FutureReturnValueIgnored") + private void disconnectAndWait() throws InterruptedException { + for (Connection c : asyncClient.getConnectionListCopy()) { + c.disconnect(); + } + Stopwatch sw = Stopwatch.createStarted(); + boolean disconnected = false; + while (sw.elapsed(TimeUnit.MILLISECONDS) < DEFAULT_SLEEP) { + boolean sleep = false; + if (!asyncClient.getConnectionListCopy().isEmpty()) { + for (Connection c : asyncClient.getConnectionListCopy()) { + if (!c.isTerminated()) { + sleep = true; + break; + } + } + + } + if (sleep) { + Thread.sleep(50); + } else { + disconnected = true; + break; + } + } + assertTrue(disconnected); + } + + @Test + public void testBadHostnames() throws Exception { + String badHostname = "some-unknown-host-hopefully"; + final int requestBatchSize = 10; + + // Test that a bad hostname for the master makes us error out quickly. + try (AsyncKuduClient invalidClient = + new AsyncKuduClient.AsyncKuduClientBuilder(badHostname).build()) { + try { + invalidClient.listTabletServers().join(1000); + fail("This should have failed quickly"); + } catch (Exception ex) { + assertTrue(ex instanceof NonRecoverableException); + assertTrue(ex.getMessage().contains(badHostname)); + } + } + + List tabletLocations = new ArrayList<>(); + List tsInfos = new ArrayList<>(); + + // Builder three bad locations. + Master.TabletLocationsPB.Builder tabletPb = Master.TabletLocationsPB.newBuilder(); + for (int i = 0; i < 3; i++) { + Common.PartitionPB.Builder partition = Common.PartitionPB.newBuilder(); + partition.setPartitionKeyStart(ByteString.copyFrom("a" + i, UTF_8.name())); + partition.setPartitionKeyEnd(ByteString.copyFrom("b" + i, UTF_8.name())); + tabletPb.setPartition(partition); + tabletPb.setTabletId(ByteString.copyFromUtf8("some id " + i)); + tabletPb.addInternedReplicas(ProtobufUtils.getFakeTabletInternedReplicaPB( + i, Metadata.RaftPeerPB.Role.FOLLOWER)); + tabletLocations.add(tabletPb.build()); + tsInfos.add(ProtobufUtils.getFakeTSInfoPB("uuid",badHostname + i, i).build()); + } + + // Test that a tablet full of unreachable replicas won't make us retry. + try { + KuduTable badTable = new KuduTable(asyncClient, "Invalid table name", + "Invalid table ID", null, null, 3, null, null, null); + asyncClient.discoverTablets(badTable, null, requestBatchSize, + tabletLocations, tsInfos, 1000); + fail("This should have failed quickly"); + } catch (NonRecoverableException ex) { + assertTrue(ex.getMessage().contains(badHostname)); + } + } + + @Test + public void testDiscoverTabletOnBadHostname() throws Exception { + int tserverIdx = 1; // select one tserver for testing + final CreateTableOptions options = getBasicCreateTableOptions(); + final KuduTable table = client.createTable( + "testDiscoverTabletOnBadHostname-" + System.currentTimeMillis(), + basicSchema, + options); + // Get the tserver host_port to uuid mapping + List tservers = harness.getTabletServers(); + + // call discoverTablets + List tabletLocations = new ArrayList<>(); + List tsInfos = new ArrayList<>(); + + // Builder three bad locations. + Master.TabletLocationsPB.Builder tabletPb = Master.TabletLocationsPB.newBuilder(); + for (int i = 0; i < 3; i++) { + Common.PartitionPB.Builder partition = Common.PartitionPB.newBuilder(); + partition.setPartitionKeyStart(ByteString.copyFrom("a" + i, UTF_8.name())); + partition.setPartitionKeyEnd(ByteString.copyFrom("b" + i, UTF_8.name())); + tabletPb.setPartition(partition); + tabletPb.setTabletId(ByteString.copyFromUtf8("some id " + i)); + tabletPb.addInternedReplicas(ProtobufUtils.getFakeTabletInternedReplicaPB( + i, Metadata.RaftPeerPB.Role.FOLLOWER)); + tabletLocations.add(tabletPb.build()); + List hostPort = Splitter.on(':').splitToList(tservers.get(i).toString()); + String tserverHost = hostPort.get(0); + if (i == tserverIdx) { + // simulate IP resolve failure by hacking the hostname + tserverHost = tserverHost + "xxx"; + } + tsInfos.add(ProtobufUtils.getFakeTSInfoPB("tserver", + tserverHost, Integer.parseInt(hostPort.get(1))).build()); + } + try { + asyncClient.discoverTablets(table, new byte[0], 100, + tabletLocations, tsInfos, 1000); + } catch (Exception ex) { + fail("discoverTablets should not complain: " + ex.getMessage()); + } + } + + @Test + public void testNoLeader() throws Exception { + final int requestBatchSize = 10; + final CreateTableOptions options = getBasicCreateTableOptions(); + final KuduTable table = client.createTable( + "testNoLeader-" + System.currentTimeMillis(), + basicSchema, + options); + + // Lookup the current locations so that we can pass some valid information to discoverTablets. + final List tablets = asyncClient + .locateTable(table, null, null, requestBatchSize, DEFAULT_SLEEP) + .join(DEFAULT_SLEEP); + final LocatedTablet tablet = tablets.get(0); + final LocatedTablet.Replica leader = tablet.getLeaderReplica(); + + // Fake a master lookup that only returns one follower for the tablet. + final List tabletLocations = new ArrayList<>(); + final List tsInfos = new ArrayList<>(); + Master.TabletLocationsPB.Builder tabletPb = Master.TabletLocationsPB.newBuilder(); + tabletPb.setPartition(ProtobufUtils.getFakePartitionPB()); + tabletPb.setTabletId(ByteString.copyFrom(tablet.getTabletId())); + tabletPb.addInternedReplicas(ProtobufUtils.getFakeTabletInternedReplicaPB( + 0, Metadata.RaftPeerPB.Role.FOLLOWER)); + tabletLocations.add(tabletPb.build()); + tsInfos.add(ProtobufUtils.getFakeTSInfoPB( + "master", leader.getRpcHost(), leader.getRpcPort()).build()); + try { + asyncClient.discoverTablets(table, new byte[0], requestBatchSize, + tabletLocations, tsInfos, 1000); + fail("discoverTablets should throw an exception if there's no leader"); + } catch (NoLeaderFoundException ex) { + // Expected. + } + } + + @Test + public void testConnectionRefused() throws Exception { + CreateTableOptions options = getBasicCreateTableOptions(); + KuduTable table = client.createTable( + "testConnectionRefused-" + System.currentTimeMillis(), + basicSchema, + options); + + // Warm up the caches. + assertEquals(0, countRowsInScan(client.newScannerBuilder(table).build())); + + // Make it impossible to use Kudu. + harness.killAllTabletServers(); + + // Create a scan with a short timeout. + KuduScanner scanner = client.newScannerBuilder(table).scanRequestTimeout(1000).build(); + + // Check it fails. + try { + while (scanner.hasMoreRows()) { + scanner.nextRows(); + fail("The scan should timeout"); + } + } catch (NonRecoverableException ex) { + assertTrue(ex.getStatus().isTimedOut()); + } + + // Try the same thing with an insert. + KuduSession session = client.newSession(); + session.setTimeoutMillis(1000); + OperationResponse response = session.apply(createBasicSchemaInsert(table, 1)); + assertTrue(response.hasRowError()); + assertTrue(response.getRowError().getErrorStatus().isTimedOut()); + } + + + /** + * Test creating a table with out of order primary keys in the table schema . + */ + @Test(timeout = 100000) + public void testCreateTableOutOfOrderPrimaryKeys() throws Exception { + ArrayList columns = new ArrayList<>(6); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key_1", Type.INT8).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("column1_i", Type.INT32).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key_2", Type.INT16).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("column2_i", Type.INT32).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("column3_s", Type.STRING).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("column4_b", Type.BOOL).build()); + Schema schema = new Schema(columns); + try { + asyncClient.createTable("testCreateTableOutOfOrderPrimaryKeys-" + System.currentTimeMillis(), + schema, + getBasicCreateTableOptions()).join(); + fail(); + } catch (NonRecoverableException nre) { + assertTrue(nre.getMessage().startsWith("Got out-of-order key column")); + } + } + + /** + * Test supportsIgnoreOperations() when the cluster does support them. + */ + @Test(timeout = 100000) + public void testSupportsIgnoreOperationsTrue() throws Exception { + assertTrue(asyncClient.supportsIgnoreOperations().join()); + } + + /** + * Test supportsIgnoreOperations() when the cluster does not support them. + */ + @Test(timeout = 100000) + @KuduTestHarness.MasterServerConfig(flags = { "--master_support_ignore_operations=false" }) + public void testSupportsIgnoreOperationsFalse() throws Exception { + assertFalse(asyncClient.supportsIgnoreOperations().join()); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAsyncKuduSession.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAsyncKuduSession.java new file mode 100644 index 0000000000..02876c5d51 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAsyncKuduSession.java @@ -0,0 +1,453 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInTable; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; +import static org.apache.kudu.test.junit.AssertHelpers.assertEventuallyTrue; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.List; + +import com.stumbleupon.async.Deferred; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.Schema; +import org.apache.kudu.WireProtocol.AppStatusPB; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.junit.AssertHelpers.BooleanExpression; +import org.apache.kudu.tserver.Tserver.TabletServerErrorPB; + +public class TestAsyncKuduSession { + private static final Logger LOG = LoggerFactory.getLogger(TestAsyncKuduSession.class); + + private static final String TABLE_NAME = TestAsyncKuduSession.class.getName(); + private static final Schema SCHEMA = getBasicSchema(); + private static final String INJECTED_TS_ERROR = "injected error for test"; + + private AsyncKuduClient client; + private AsyncKuduSession session; + private KuduTable table; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() throws Exception { + client = harness.getAsyncClient(); + session = client.newSession(); + table = harness.getClient().createTable(TABLE_NAME, SCHEMA, getBasicCreateTableOptions()); + } + + /** + * Test that errors in a background flush are surfaced to clients. + * TODO(wdberkeley): Improve the method of injecting errors into batches, here and below. + * @throws Exception + */ + @Test(timeout = 100000) + public void testBackgroundErrors() throws Exception { + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + session.setFlushInterval(10); + try { + Batch.injectTabletServerErrorAndLatency(makeTabletServerError(), 0); + + OperationResponse resp = session.apply(createInsert(1)).join(DEFAULT_SLEEP); + assertTrue(resp.hasRowError()); + assertTrue( + resp.getRowError().getErrorStatus() + .getMessage().contains(INJECTED_TS_ERROR)); + assertEquals(1, session.countPendingErrors()); + } finally { + Batch.injectTabletServerErrorAndLatency(null, 0); + } + } + + /** + * Regression test for a case where an error in the previous batch could cause the next + * batch to hang in flush(). + */ + @Test(timeout = 100000) + public void testBatchErrorCauseSessionStuck() throws Exception { + session.setFlushMode(AsyncKuduSession.FlushMode.AUTO_FLUSH_BACKGROUND); + session.setFlushInterval(100); + try { + Batch.injectTabletServerErrorAndLatency(makeTabletServerError(), 200); + // 0ms: Insert the first row, which will be the first batch. + Deferred resp1 = session.apply(createInsert(1)); + Thread.sleep(120); + // 100ms: Start to send the first batch. + // 100ms+: The first batch receives a response from the tablet leader, and + // will wait 200s and throw an error. + // 120ms: Insert another row, which will be the second batch. + Deferred resp2 = session.apply(createInsert(2)); + // 220ms: Start to send the second batch while the first batch is in flight. + // 300ms+: The first batch completes with an error. The second batch is in flight. + { + OperationResponse resp = resp1.join(DEFAULT_SLEEP); + assertTrue(resp.hasRowError()); + assertTrue( + resp.getRowError().getErrorStatus() + .getMessage().contains(INJECTED_TS_ERROR)); + } + // 300ms++: The second batch completes with an error. It does not remain stuck flushing. + { + OperationResponse resp = resp2.join(DEFAULT_SLEEP); + assertTrue(resp.hasRowError()); + assertTrue( + resp.getRowError().getErrorStatus() + .getMessage().contains(INJECTED_TS_ERROR)); + } + assertFalse(session.hasPendingOperations()); + } finally { + Batch.injectTabletServerErrorAndLatency(null, 0); + } + } + + /** + * Regression test for a case when a tablet lookup error causes the original write RPC to hang. + * @throws Exception + */ + @Test(timeout = 100000) + public void testGetTableLocationsErrorCausesStuckSession() throws Exception { + // Make sure tablet locations are cached. + Insert insert = createInsert(1); + session.apply(insert).join(DEFAULT_SLEEP); + RemoteTablet rt = + client.getTableLocationEntry(table.getTableId(), insert.partitionKey()).getTablet(); + String tabletId = rt.getTabletId(); + RpcProxy proxy = client.newRpcProxy(rt.getLeaderServerInfo()); + // Delete the table so subsequent writes fail with 'table not found'. + client.deleteTable(TABLE_NAME).join(); + // Wait until the tablet is deleted on the TS. + while (true) { + ListTabletsRequest req = new ListTabletsRequest(client.getTimer(), 10000); + Deferred d = req.getDeferred(); + proxy.sendRpc(req); + ListTabletsResponse resp = d.join(); + if (!resp.getTabletsList().contains(tabletId)) { + break; + } + Thread.sleep(100); + } + + OperationResponse response = session.apply(createInsert(1)).join(DEFAULT_SLEEP); + assertTrue(response.hasRowError()); + assertTrue(response.getRowError().getErrorStatus().isNotFound()); + } + + /** Regression test for a failure to correctly handle a timeout when flushing a batch. */ + @Test + public void testInsertIntoUnavailableTablet() throws Exception { + harness.killAllTabletServers(); + session.setTimeoutMillis(1); + // Test AUTO_FLUSH_SYNC (the default). + OperationResponse response = session.apply(createInsert(1)).join(); + assertTrue(response.hasRowError()); + LOG.debug("response error: {}", response.getRowError()); + assertTrue(response.getRowError().getErrorStatus().isTimedOut()); + + // Test MANUAL_FLUSH. + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + Insert insert = createInsert(1); + session.apply(insert); + List responses = session.flush().join(); + assertEquals(1, responses.size()); + response = responses.get(0); + assertTrue(response.hasRowError()); + LOG.debug("response error: {}", response.getRowError()); + assertTrue(response.getRowError().getErrorStatus().isTimedOut()); + } + + /** + * Regression test for a bug in which, when a tablet client is disconnected + * and we reconnect, we were previously leaking the old RpcProxy + * object in the client2tablets map. + */ + @Test(timeout = 100000) + public void testRestartBetweenWrites() throws Exception { + // Create a non-replicated table for this test, so that + // we're sure when we reconnect to the leader after restarting + // the tablet servers, it's definitely the same leader we wrote + // to before. + KuduTable nonReplicatedTable = harness.getClient().createTable( + "non-replicated", + SCHEMA, + getBasicCreateTableOptions().setNumReplicas(1)); + + // Write before doing any restarts to establish a connection. + session.setTimeoutMillis(30000); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + session.apply(createBasicSchemaInsert(nonReplicatedTable, 1)).join(); + + final int numClientsBefore = client.getConnectionListCopy().size(); + + // Restart all the tablet servers. + harness.killAllTabletServers(); + harness.startAllTabletServers(); + + // Perform another write, which will require reconnecting to the same + // tablet server that we wrote to above. + session.apply(createBasicSchemaInsert(nonReplicatedTable, 2)).join(); + + // We should not have leaked an entry in the client2tablets map. + int numClientsAfter = client.getConnectionListCopy().size(); + assertEquals(numClientsBefore, numClientsAfter); + } + + /** + * Regression test for KUDU-232, where, in AUTO_FLUSH_BACKGROUND mode, a call to `flush` could + * throw instead of blocking on in-flight ops that are doing tablet lookups. + */ + @Test(timeout = 100000) + public void testKUDU232() throws Exception { + session.setFlushMode(AsyncKuduSession.FlushMode.AUTO_FLUSH_BACKGROUND); + + // Set the flush interval high enough that the operation won't flush in the background before + // the call to `flush`. + session.setFlushInterval(DEFAULT_SLEEP + 1000); + session.apply(createInsert(0)); + + // `flush` should not throw and should block until the row has been flushed. Ergo, the row + // should now be readable server-side by this client. + session.flush().join(DEFAULT_SLEEP); + assertEquals(1, countRowsInTable(table)); + } + + /** + * Test that changing the flush mode while ops are in flight results in an error. + * @throws Exception + */ + @Test(timeout = 100000) + public void testChangingFlushModeWithOpsInFlightIsAnError() throws Exception { + // Buffer an operation in MANUAL_FLUSH mode. + session.setFlushMode(AsyncKuduSession.FlushMode.MANUAL_FLUSH); + session.apply(createInsert(10)); + + try { + // `flush` was never called, so switching the flush mode is an error. + session.setFlushMode(AsyncKuduSession.FlushMode.AUTO_FLUSH_SYNC); + fail(); + } catch (IllegalArgumentException ex) { + // Furthermore, the flush mode should not have changed. + assertTrue(ex.getMessage().contains("Cannot change flush mode when writes are buffered")); + assertEquals(session.getFlushMode(), AsyncKuduSession.FlushMode.MANUAL_FLUSH); + } + } + + /** + * Test the behavior of AUTO_FLUSH_SYNC mode. + * @throws Exception + */ + @Test(timeout = 100000) + public void testAutoFlushSync() throws Exception { + final int kNumOps = 1000; + session.setFlushMode(AsyncKuduSession.FlushMode.AUTO_FLUSH_SYNC); + + // Apply a bunch of operations. There's no buffer to overflow, but the client does need to track + // each op to know if and when it has succeeded. + List> opResponses = new ArrayList<>(); + for (int i = 0; i < kNumOps; i++) { + opResponses.add(session.apply(createInsert(i))); + } + + // Wait on all the ops. After this, all ops should be visible. No explicit flush required. + Deferred.group(opResponses).join(DEFAULT_SLEEP); + assertEquals(kNumOps, countRowsInTable(table)); + } + + /** + * Test the behavior of MANUAL_FLUSH mode. + * @throws Exception + */ + @Test(timeout = 100000) + public void testManualFlush() throws Exception { + final int kBufferSizeOps = 10; + session.setFlushMode(AsyncKuduSession.FlushMode.MANUAL_FLUSH); + session.setMutationBufferSpace(kBufferSizeOps); + + // Fill the buffer. + for (int i = 0; i < kBufferSizeOps; i++) { + session.apply(createInsert(i)); + } + + // There was no call to flush, so there should be no rows in the table. + assertEquals(0, countRowsInTable(table)); + + // Attempting to buffer another op is an error. + try { + session.apply(createInsert(kBufferSizeOps + 1)); + fail(); + } catch (KuduException ex) { + assertTrue(ex.getMessage().contains("MANUAL_FLUSH is enabled but the buffer is too big")); + } + + // Now flush. There should be `kBufferSizeOps` rows in the end. + session.flush().join(DEFAULT_SLEEP); + assertEquals(kBufferSizeOps, countRowsInTable(table)); + + // Applying another operation should now succeed. + session.apply(createInsert(kBufferSizeOps + 1)); + } + + /** + * Test the behavior of AUTO_FLUSH_BACKGROUND mode. Because this mode does a lot of work in the + * background, possibly on different threads, it's difficult to test precisely. + * TODO(wdberkeley): Invent better ways of testing AUTO_FLUSH_BACKGROUND edge cases. + * @throws Exception + */ + @Test + public void testAutoFlushBackground() throws Exception { + final int kBufferSizeOps = 10; + session.setFlushMode(AsyncKuduSession.FlushMode.AUTO_FLUSH_BACKGROUND); + session.setMutationBufferSpace(kBufferSizeOps); + + // In AUTO_FLUSH_BACKGROUND mode, the session can accept up to 2x the buffer size of ops before + // it might be out of buffer space (depending on if a background flush finishes). + for (int i = 0; i < 2 * kBufferSizeOps; i++) { + session.apply(createInsert(i)); + } + + // If the client tries to buffer many more operations, it may receive a PleaseThrottleException. + // In this case, if the client simply waits for a flush notification on the Deferred returned + // with the exception, it can continue to buffer operations. + final int kNumOpsMultipler = 10; + for (int i = 2 * kBufferSizeOps; i < kNumOpsMultipler * kBufferSizeOps; i++) { + Insert insert = createInsert(i); + try { + session.apply(insert); + } catch (PleaseThrottleException ex) { + ex.getDeferred().join(DEFAULT_SLEEP); + session.apply(insert); + } + } + + // After a final call to `flush` all operations should be visible to this client. + session.flush().join(DEFAULT_SLEEP); + assertEquals(kNumOpsMultipler * kBufferSizeOps, countRowsInTable(table)); + } + + /** + * Test a tablet going missing or encountering a new tablet while inserting a lot of data. This + * code used to fail in many different ways. + * @throws Exception + */ + @Test(timeout = 100000) + public void testTabletCacheInvalidatedDuringWrites() throws Exception { + final int kNumOps = 10000; + session.setFlushMode(AsyncKuduSession.FlushMode.AUTO_FLUSH_BACKGROUND); + + // Insert 2 * kNumOps rows, but drop the locations cache partway through. + for (int i = 0; i < kNumOps; i++) { + Insert insert = createInsert(i); + try { + session.apply(insert); + } catch (PleaseThrottleException ex) { + ex.getDeferred().join(DEFAULT_SLEEP); + session.apply(insert); + } + } + + client.emptyTabletsCacheForTable(table.getTableId()); + + for (int i = kNumOps; i < 2 * kNumOps; i++) { + Insert insert = createInsert(i); + try { + session.apply(insert); + } catch (PleaseThrottleException ex) { + ex.getDeferred().join(DEFAULT_SLEEP); + session.apply(insert); + } + } + + session.flush().join(DEFAULT_SLEEP); + assertEquals(2 * kNumOps, countRowsInTable(table)); + } + + /** + * Test KuduSession supports configuring buffer space by data size. + */ + @Test(timeout = 90000) + public void testFlushBySize() throws Exception { + AsyncKuduSession session = client.newSession(); + final int kBufferSizeOps = 10; + // Considering the existence of buffers, we set a number of operations that is significantly + // larger than the number of buffers to ensure that the buffers are triggered to flush. + final int kNumOps = 100; + // Set a small buffer size so we should flush every time. + session.setMutationBufferSpace(kBufferSizeOps, 1); + // Set a large flush interval so if the flush by size function is not correctly implemented, + // the test will timeout. + session.setFlushInterval(60 * 60 * 1000); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + + for (int i = 0; i < kNumOps; i++) { + // If the client tries to buffer many more operations, it may receive a + // PleaseThrottleException. In this case, if the client simply waits for a flush notification + // on the Deferred returned with the exception, it can continue to buffer operations. + Insert insert = createInsert(i); + try { + session.apply(insert); + } catch (PleaseThrottleException ex) { + ex.getDeferred().join(DEFAULT_SLEEP); + session.apply(insert); + } + } + // There might be pending requests in the cache, but the above operation should not generate any + // errors. + assertEquals(0, session.countPendingErrors()); + // Confirm that we can still make progress. + Insert insert = createInsert(kNumOps); + try { + session.apply(insert); + } catch (PleaseThrottleException ex) { + ex.getDeferred().join(DEFAULT_SLEEP); + session.apply(insert); + } + + for (OperationResponse resp: session.flush().join(DEFAULT_SLEEP)) { + assertFalse(resp.hasRowError()); + } + assertEquals(0, session.close().join(DEFAULT_SLEEP).size()); + } + + // A helper just to make some lines shorter. + private Insert createInsert(int key) { + return createBasicSchemaInsert(table, key); + } + + private TabletServerErrorPB makeTabletServerError() { + return TabletServerErrorPB.newBuilder() + .setCode(TabletServerErrorPB.Code.UNKNOWN_ERROR) + .setStatus(AppStatusPB.newBuilder() + .setCode(AppStatusPB.ErrorCode.UNKNOWN_ERROR) + .setMessage(INJECTED_TS_ERROR) + .build()) + .build(); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAuthTokenReacquire.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAuthTokenReacquire.java new file mode 100644 index 0000000000..82b2035852 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAuthTokenReacquire.java @@ -0,0 +1,257 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.Schema; +import org.apache.kudu.security.Token; +import org.apache.kudu.test.ClientTestUtil; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.cluster.MiniKuduCluster.MiniKuduClusterBuilder; + +/** + * This test contains scenarios to verify that clients re-acquire tokens upon + * expiration of the current one and automatically retries affected calls. + */ +public class TestAuthTokenReacquire { + private static final Logger LOG = LoggerFactory.getLogger(TestAuthTokenReacquire.class); + + private static final String TABLE_NAME = "TestAuthTokenReacquire-table"; + + // Set a low token timeout. + private static final int TOKEN_TTL_SEC = 1; + private static final int OP_TIMEOUT_MS = 60 * TOKEN_TTL_SEC * 1000; + + private static final Schema basicSchema = ClientTestUtil.getBasicSchema(); + + // Inject additional INVALID_AUTHENTICATION_TOKEN responses from both the + // master and tablet servers, even for not-yet-expired tokens. + private static final MiniKuduClusterBuilder clusterBuilder = + KuduTestHarness.getBaseClusterBuilder() + .enableKerberos() + .addMasterServerFlag(String.format("--authn_token_validity_seconds=%d", TOKEN_TTL_SEC)) + .addMasterServerFlag(String.format("--authz_token_validity_seconds=%d", TOKEN_TTL_SEC)) + .addMasterServerFlag("--rpc_inject_invalid_authn_token_ratio=0.5") + .addTabletServerFlag("--rpc_inject_invalid_authn_token_ratio=0.5") + .addTabletServerFlag("--tserver_enforce_access_control=true") + .addTabletServerFlag("--tserver_inject_invalid_authz_token_ratio=0.5"); + + private KuduClient client; + private AsyncKuduClient asyncClient; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(clusterBuilder); + + @Before + public void setUp() { + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + } + + @SuppressWarnings("FutureReturnValueIgnored") + private void dropConnections() { + for (Connection c : asyncClient.getConnectionListCopy()) { + c.disconnect(); + } + } + + private void dropConnectionsAndExpireTokens() throws InterruptedException { + // Drop all connections from the client to Kudu servers. + dropConnections(); + // Wait for token expiration. Since we've just dropped all connections, this + // means that we'll need to get a new authn token upon sending the next RPC. + expireTokens(); + } + + private void expireTokens() throws InterruptedException { + // Sleep long enough for the authn/authz tokens to expire. Wait for just + // past the token TTL to avoid making this test flaky, e.g. in case the + // token just misses being considered expired. + Thread.sleep((TOKEN_TTL_SEC + 1) * 1000); + } + + @Test + public void testBasicMasterOperations() throws Exception { + // To ratchet up the intensity a bit, run the scenario by several concurrent threads. + List threads = new ArrayList<>(); + final Map exceptions = + Collections.synchronizedMap(new HashMap<>()); + for (int i = 0; i < 8; ++i) { + final int threadIdx = i; + Thread thread = new Thread(new Runnable() { + @Override + @SuppressWarnings("AssertionFailureIgnored") + public void run() { + final String tableName = "TestAuthTokenReacquire-table-" + threadIdx; + try { + ListTabletServersResponse response = client.listTabletServers(); + assertNotNull(response); + dropConnectionsAndExpireTokens(); + + ListTablesResponse tableList = client.getTablesList(tableName); + assertNotNull(tableList); + assertTrue(tableList.getTablesList().isEmpty()); + dropConnectionsAndExpireTokens(); + + try { + client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + } catch (KuduException ex) { + // Swallow "table already exists" exceptions. This might happen if the thread sends + // a CreateTable request, the master receives it, but the connection is dropped by + // another thread before the client receives a success response, and then the client + // retries. + // TODO(KUDU-1537): Remove this workaround when table creation is exactly-once. + Status exStatus = ex.getStatus(); + if (!exStatus.isAlreadyPresent() && !exStatus.isServiceUnavailable()) { + throw ex; + } + } + dropConnectionsAndExpireTokens(); + + KuduTable table = client.openTable(tableName); + assertEquals(basicSchema.getColumnCount(), table.getSchema().getColumnCount()); + dropConnectionsAndExpireTokens(); + + try { + client.deleteTable(tableName); + } catch (KuduException ex) { + // See the above comment about table creation. + // The same idea applies to table deletion. + // TODO(KUDU-1537): Remove this workaround when table deletion is exactly-once. + if (!ex.getStatus().isNotFound()) { + throw ex; + } + } + assertFalse(client.tableExists(tableName)); + } catch (Throwable e) { + //noinspection ThrowableResultOfMethodCallIgnored + exceptions.put(threadIdx, e); + } + } + }); + thread.start(); + threads.add(thread); + } + for (Thread thread : threads) { + thread.join(); + } + if (!exceptions.isEmpty()) { + for (Map.Entry e : exceptions.entrySet()) { + LOG.error(String.format("exception in thread %s:", e.getKey()), e.getValue()); + } + fail("test failed: unexpected errors"); + } + } + + private int countRowsInTable(KuduTable table) throws Exception { + AsyncKuduScanner scanner = new AsyncKuduScanner.AsyncKuduScannerBuilder(asyncClient, table) + .scanRequestTimeout(OP_TIMEOUT_MS) + .build(); + return countRowsInScan(scanner); + } + + private void insertRowWithKey(KuduSession session, KuduTable table, int key) throws Exception { + session.apply(createBasicSchemaInsert(table, key)); + session.flush(); + RowErrorsAndOverflowStatus errors = session.getPendingErrors(); + assertFalse(errors.isOverflowed()); + assertEquals(0, session.countPendingErrors()); + } + + private List splitKeyRange(KuduTable table) throws Exception { + // Note: the nulls are for key bounds; we don't really care about them. + return table.getAsyncClient().getTableKeyRanges(table, null, null, null, null, + AsyncKuduClient.FETCH_TABLETS_PER_RANGE_LOOKUP, 1, DEFAULT_SLEEP).join(); + } + + @Test + public void testBasicWorkflow() throws Exception { + final KuduTable table = client.createTable(TABLE_NAME, basicSchema, + getBasicCreateTableOptions()); + final String tableId = table.getTableId(); + int key = 0; + + // Drop all connections so the first write needs to reconnect with a new authn token. + // CHECKSTYLE:OFF + Token.SignedTokenPB originalToken = asyncClient.securityContext.getAuthenticationToken(); + // CHECKSTYLE:ON + dropConnectionsAndExpireTokens(); + KuduSession session = client.newSession(); + session.setTimeoutMillis(OP_TIMEOUT_MS); + insertRowWithKey(session, table, ++key); + + // Verify that we got a different authn token. + assertFalse(asyncClient.securityContext.getAuthenticationToken().equals(originalToken)); + + // Now wait for the authz token to expire and do a write. + originalToken = asyncClient.getAuthzToken(tableId); + expireTokens(); + insertRowWithKey(session, table, ++key); + + // Verify that we got a different authz token. + assertFalse(asyncClient.getAuthzToken(tableId).equals(originalToken)); + + // Drop all connections so the first scan needs to reconnect with a new authn token. + originalToken = asyncClient.securityContext.getAuthenticationToken(); + dropConnectionsAndExpireTokens(); + KuduTable scanTable = client.openTable(TABLE_NAME); + assertEquals(key, countRowsInTable(scanTable)); + assertFalse(asyncClient.securityContext.getAuthenticationToken().equals(originalToken)); + + // Now wait for the authz token to expire and do a scan. + originalToken = asyncClient.getAuthzToken(tableId); + expireTokens(); + assertEquals(key, countRowsInTable(scanTable)); + assertFalse(asyncClient.getAuthzToken(tableId).equals(originalToken)); + + // Now wait for the authz token to expire and send a request to split the + // key range. It should succeed and get a new authz token. + originalToken = asyncClient.getAuthzToken(tableId); + expireTokens(); + assertFalse(splitKeyRange(scanTable).isEmpty()); + assertFalse(asyncClient.getAuthzToken(tableId).equals(originalToken)); + + // Force the client to get a new authn token and delete the table. + originalToken = asyncClient.securityContext.getAuthenticationToken(); + dropConnectionsAndExpireTokens(); + client.deleteTable(TABLE_NAME); + assertFalse(client.tableExists(TABLE_NAME)); + assertFalse(asyncClient.securityContext.getAuthenticationToken().equals(originalToken)); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAuthnTokenReacquireOpen.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAuthnTokenReacquireOpen.java new file mode 100644 index 0000000000..f7d23b4be6 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAuthnTokenReacquireOpen.java @@ -0,0 +1,124 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.Schema; +import org.apache.kudu.test.ClientTestUtil; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.cluster.MiniKuduCluster.MiniKuduClusterBuilder; + +/** + * This test contains a special scenario to make sure the automatic authn token re-acquisition works + * in the case when the client has established a connection to the master using secondary + * credentials. The subtlety is that an authn token cannot be acquired using such a connection, + * so this test verifies that the client opens a new connection using its primary credentials to + * acquire a new authentication token and automatically retries its RPCs with the new authn token. + */ +public class TestAuthnTokenReacquireOpen { + + private static final String TABLE_NAME = "TestAuthnTokenReacquireOpen-table"; + private static final int TOKEN_TTL_SEC = 1; + private static final int OP_TIMEOUT_MS = 60 * TOKEN_TTL_SEC * 1000; + private static final int KEEPALIVE_TIME_MS = 2 * OP_TIMEOUT_MS; + + private static final Schema basicSchema = ClientTestUtil.getBasicSchema(); + + private static final MiniKuduClusterBuilder clusterBuilder = + KuduTestHarness.getBaseClusterBuilder() + // We want to have a cluster with a single master. + .numMasterServers(1) + // Set appropriate TTL for authn token and connection keep-alive property, so the client + // could keep an open connection to the master when its authn token is already expired. + // Inject additional INVALID_AUTHENTICATION_TOKEN responses from the tablet server even + // for not-yet-expired tokens for an extra stress on the client. + .enableKerberos() + .addMasterServerFlag( + String.format("--authn_token_validity_seconds=%d", TOKEN_TTL_SEC)) + .addMasterServerFlag( + String.format("--rpc_default_keepalive_time_ms=%d", KEEPALIVE_TIME_MS)) + .addTabletServerFlag( + String.format("--rpc_default_keepalive_time_ms=%d", KEEPALIVE_TIME_MS)) + .addTabletServerFlag("--rpc_inject_invalid_authn_token_ratio=0.5"); + + private KuduClient client; + private AsyncKuduClient asyncClient; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(clusterBuilder); + + @Before + public void setUp() { + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + } + + @SuppressWarnings("FutureReturnValueIgnored") + private void dropConnections() { + for (Connection c : asyncClient.getConnectionListCopy()) { + c.disconnect(); + } + } + + private static void expireToken() throws InterruptedException { + // Wait for authn token expiration. + Thread.sleep(TOKEN_TTL_SEC * 1000); + } + + @Test + public void test() throws Exception { + // Establish a connection to the cluster, get the list of tablet servers. That would fetch + // an authn token. + ListTabletServersResponse response = client.listTabletServers(); + assertNotNull(response); + dropConnections(); + + // The connection to the master has been dropped. Make a call to the master again so the client + // would create a new connection using authn token. + ListTablesResponse tableList = client.getTablesList(null); + assertNotNull(tableList); + assertTrue(tableList.getTablesList().isEmpty()); + + client.createTable(TABLE_NAME, basicSchema, getBasicCreateTableOptions()); + assertTrue(client.tableExists(TABLE_NAME)); + + expireToken(); + + // Try scan table rows once the authn token has expired. This request goes to corresponding + // tablet server, and a new connection should be negotiated. During connection negotiation, + // the server authenticates the client using authn token, which is expired. + KuduTable scanTable = client.openTable(TABLE_NAME); + AsyncKuduScanner scanner = new AsyncKuduScanner.AsyncKuduScannerBuilder(asyncClient, scanTable) + .scanRequestTimeout(OP_TIMEOUT_MS) + .build(); + assertEquals(0, countRowsInScan(scanner)); + + client.deleteTable(TABLE_NAME); + assertFalse(client.tableExists(TABLE_NAME)); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAuthzTokenCache.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAuthzTokenCache.java new file mode 100644 index 0000000000..06d81e7d7b --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestAuthzTokenCache.java @@ -0,0 +1,154 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import com.stumbleupon.async.Deferred; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.security.Token; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.cluster.MiniKuduCluster; + +public class TestAuthzTokenCache { + private static final Logger LOG = LoggerFactory.getLogger(TestAuthzTokenCache.class); + + // This tests basic functionality of the authz token cache (e.g. putting + // things in, getting stuff out). + private static final MiniKuduCluster.MiniKuduClusterBuilder clusterBuilder = + KuduTestHarness.getBaseClusterBuilder() + .enableKerberos(); + + private static final String tableName = "TestAuthzTokenCache-table"; + + private KuduClient client; + private AsyncKuduClient asyncClient; + + @Before + public void setUp() { + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + } + + @Rule + public KuduTestHarness harness = new KuduTestHarness(clusterBuilder); + + // Retrieves a new authz token from the master (regardless of whether there is + // already one in the authz token cache). + public void fetchAuthzToken(KuduTable table) throws Exception { + // Send a dummy RPC via the token cache. This will run a scan RPC + // after retrieving a new authz token. + AsyncKuduScanner scanner = new AsyncKuduScanner.AsyncKuduScannerBuilder(asyncClient, table) + .build(); + KuduRpc req = scanner.getOpenRequest(); + Deferred d = req.getDeferred(); + asyncClient.getAuthzTokenCache().retrieveAuthzToken(req, + new InvalidAuthzTokenException(Status.IOError("test failure"))); + assertNotNull(d.join()); + } + + @Test + public void testBasicAuthzTokenCache() throws Exception { + // First, do a sanity check that we get an authz token in the first place + // upon accessing a table. + final KuduTable table = client.createTable(tableName, getBasicSchema(), + getBasicCreateTableOptions()); + final AuthzTokenCache tokenCache = asyncClient.getAuthzTokenCache(); + String tableId = table.getTableId(); + Token.SignedTokenPB originalToken = asyncClient.getAuthzToken(tableId); + assertNotNull(originalToken); + + // Wait a bit so the next token we get will be different. A different token + // will be generated every second by virtue of having a different + // expiration, which is in seconds. + Thread.sleep(1100); + + // Send a dummy RPC via the token cache, sending it only after getting a new + // authz token. + fetchAuthzToken(table); + + // Verify we actually got a new authz token. + assertFalse(asyncClient.getAuthzToken(tableId).equals(originalToken)); + + // Now put the original token directly in the cache. + tokenCache.put(tableId, originalToken); + assertTrue(asyncClient.getAuthzToken(tableId).equals(originalToken)); + } + + @Test + public void testRetrieveAuthzTokensInParallel() throws Exception { + final KuduTable table = client.createTable(tableName, getBasicSchema(), + getBasicCreateTableOptions()); + final String tableId = table.getTableId(); + + class AuthzTokenFetcher implements Callable { + @Override + public Exception call() { + try { + fetchAuthzToken(table); + } catch (Exception e) { + return e; + } + return null; + } + } + + // Send a bunch of authz token requests in parallel. + final int NUM_THREADS = 30; + ArrayList fetchers = new ArrayList<>(); + for (int i = 0; i < NUM_THREADS; i++) { + fetchers.add(new AuthzTokenFetcher()); + } + int fails = 0; + final ExecutorService pool = Executors.newFixedThreadPool(NUM_THREADS); + List> exceptions = pool.invokeAll(fetchers); + pool.shutdown(); + for (int i = 0; i < NUM_THREADS; i++) { + Exception e = exceptions.get(i).get(); + if (e != null) { + fails++; + e.printStackTrace(); + } + } + assertEquals(0, fails); + // We should have gotten a token with all those retrievals, and sent a + // number of RPCs that was lower than the number of threads. + assertNotNull(asyncClient.getAuthzToken(tableId)); + int numRetrievals = asyncClient.getAuthzTokenCache().numRetrievalsSent(); + LOG.debug(String.format("Sent %d RPCs for %d threads", numRetrievals, NUM_THREADS)); + assertTrue(0 < numRetrievals); + assertTrue(numRetrievals < NUM_THREADS); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestBitSet.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestBitSet.java new file mode 100644 index 0000000000..186d2b065c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestBitSet.java @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.BitSet; + +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.junit.RetryRule; + +public class TestBitSet { + + @Rule + public RetryRule retryRule = new RetryRule(); + + /** + * Test out BitSet-related operations + */ + @Test + public void test() { + int colCount = 1; + BitSet bs = new BitSet(colCount); + bs.set(0); + int size = Bytes.getBitSetSize(colCount); + byte[] result = Bytes.fromBitSet(bs, colCount); + assertEquals(size, result.length); + BitSet newBs = Bytes.toBitSet(result, 0, colCount); + assertTrue(newBs.get(0)); + + colCount = 7; + bs = new BitSet(colCount); + bs.set(0); + bs.set(5); + size = Bytes.getBitSetSize(colCount); + result = Bytes.fromBitSet(bs, colCount); + assertEquals(size, result.length); + newBs = Bytes.toBitSet(result, 0, colCount); + assertTrue(newBs.get(0)); + assertFalse(newBs.get(1)); + assertFalse(newBs.get(2)); + assertFalse(newBs.get(3)); + assertFalse(newBs.get(4)); + assertTrue(newBs.get(5)); + assertFalse(newBs.get(6)); + + colCount = 8; + bs = new BitSet(colCount); + bs.set(0); + bs.set(5); + bs.set(7); + size = Bytes.getBitSetSize(colCount); + result = Bytes.fromBitSet(bs, colCount); + assertEquals(size, result.length); + newBs = Bytes.toBitSet(result, 0, colCount); + assertTrue(newBs.get(0)); + assertFalse(newBs.get(1)); + assertFalse(newBs.get(2)); + assertFalse(newBs.get(3)); + assertFalse(newBs.get(4)); + assertTrue(newBs.get(5)); + assertFalse(newBs.get(6)); + assertTrue(newBs.get(7)); + + colCount = 11; + bs = new BitSet(colCount); + bs.set(0); + bs.set(5); + bs.set(7); + bs.set(9); + size = Bytes.getBitSetSize(colCount); + result = Bytes.fromBitSet(bs, colCount); + assertEquals(size, result.length); + newBs = Bytes.toBitSet(result, 0, colCount); + assertTrue(newBs.get(0)); + assertFalse(newBs.get(1)); + assertFalse(newBs.get(2)); + assertFalse(newBs.get(3)); + assertFalse(newBs.get(4)); + assertTrue(newBs.get(5)); + assertFalse(newBs.get(6)); + assertTrue(newBs.get(7)); + assertFalse(newBs.get(8)); + assertTrue(newBs.get(9)); + assertFalse(newBs.get(10)); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestBloomFilter.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestBloomFilter.java new file mode 100644 index 0000000000..d1b86f23e6 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestBloomFilter.java @@ -0,0 +1,189 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.Random; + +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.junit.RetryRule; +import org.apache.kudu.util.BloomFilter; + +public class TestBloomFilter { + + private int numBytes = 32 * 1024; + private long randomSeed = System.currentTimeMillis(); + private int numKeys = 2000; + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Test + public void testNumberOfHashes() { + assertEquals(BloomFilter.byCountAndFPRate(10, 0.1).getNumHashes(), 3); + assertEquals(BloomFilter.byCountAndFPRate(100, 0.2).getNumHashes(), 2); + assertEquals(BloomFilter.byCountAndFPRate(1000, 0.05).getNumHashes(), 4); + assertEquals(BloomFilter.byCountAndFPRate(10000, 0.01).getNumHashes(), 6); + assertEquals(BloomFilter.bySizeAndFPRate(10, 0.1).getNumHashes(), 3); + assertEquals(BloomFilter.bySizeAndFPRate(1000, 0.2).getNumHashes(), 2); + assertEquals(BloomFilter.bySizeAndFPRate(100000, 0.05).getNumHashes(), 4); + assertEquals(BloomFilter.bySizeAndFPRate(10000000, 0.01).getNumHashes(), 6); + } + + @Test + public void testIntGenBFBySize() { + final BloomFilter bf = BloomFilter.bySize(numBytes); + // Put integers into bloomfilter by random + Random rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + bf.put(rand.nextInt()); + } + // Reset the rand and check existence of the keys. + rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + assertTrue(bf.mayContain(rand.nextInt())); + } + } + + @Test + public void testIntGenBFByCount() { + final BloomFilter bf = BloomFilter.byCount(numKeys); + // Put integers into bloomfilter by random + Random rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + bf.put(rand.nextInt()); + } + // Reset the rand and check existence of the keys. + rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + assertTrue(bf.mayContain(rand.nextInt())); + } + } + + @Test + public void testBytes() { + final BloomFilter bf = BloomFilter.bySize(numBytes); + // Put byte arrays into bloomfilter by random + Random rand = new Random(randomSeed); + byte[] bytes = new byte[64]; + for (int i = 0; i < numKeys; i++) { + rand.nextBytes(bytes); + bf.put(bytes); + } + // Reset the rand and check existence of the keys. + rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + rand.nextBytes(bytes); + assertTrue(bf.mayContain(bytes)); + } + } + + @Test + public void testBoolean() { + final BloomFilter bf = BloomFilter.bySize(numBytes); + // Put booleans into bloomfilter by random + Random rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + bf.put(rand.nextBoolean()); + } + // Reset the rand and check existence of the keys. + rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + assertTrue(bf.mayContain(rand.nextBoolean())); + } + } + + @Test + public void testShort() { + final BloomFilter bf = BloomFilter.bySize(numBytes); + // Put shorts into bloomfilter by random + Random rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + bf.put((short)rand.nextInt()); + } + // Reset the rand and check existence of the keys. + rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + assertTrue(bf.mayContain((short)rand.nextInt())); + } + } + + @Test + public void testLong() { + final BloomFilter bf = BloomFilter.bySize(numBytes); + // Put longs into bloomfilter by random + Random rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + bf.put(rand.nextLong()); + } + // Reset the rand and check existence of the keys. + rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + assertTrue(bf.mayContain(rand.nextLong())); + } + } + + @Test + public void testFloat() { + final BloomFilter bf = BloomFilter.bySize(numBytes); + // Put floats into bloomfilter by random + Random rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + bf.put(rand.nextFloat()); + } + // Reset the rand and check existence of the keys. + rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + assertTrue(bf.mayContain(rand.nextFloat())); + } + } + + @Test + public void testDouble() { + final BloomFilter bf = BloomFilter.bySize(numBytes); + // Put doubles into bloomfilter by random + Random rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + bf.put(rand.nextDouble()); + } + // Reset the rand and check existence of the keys. + rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + assertTrue(bf.mayContain(rand.nextDouble())); + } + } + + @Test + public void testString() { + final BloomFilter bf = BloomFilter.bySize(numBytes); + // Put strings into bloomfilter by random + Random rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + bf.put(rand.nextInt() + ""); + } + // Reset the rand and check existence of the keys. + rand = new Random(randomSeed); + for (int i = 0; i < numKeys; i++) { + assertTrue(bf.mayContain(rand.nextInt() + "")); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestBytes.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestBytes.java new file mode 100644 index 0000000000..f17dc933b6 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestBytes.java @@ -0,0 +1,156 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.math.MathContext; +import java.math.RoundingMode; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.junit.RetryRule; +import org.apache.kudu.util.DecimalUtil; + +public class TestBytes { + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Test + public void test() { + byte[] bytes = new byte[16]; + + // Boolean + Bytes.setUnsignedByte(bytes, (short) 1); + assertTrue(Bytes.getBoolean(bytes)); + Bytes.setUnsignedByte(bytes, (short) 0); + assertFalse(Bytes.getBoolean(bytes)); + + // BYTES + short smallUbyte = 120; + Bytes.setUnsignedByte(bytes, smallUbyte); + assertEquals(smallUbyte, Bytes.getUnsignedByte(bytes)); + short largeUbyte = 250; + Bytes.setUnsignedByte(bytes, largeUbyte); + assertEquals(largeUbyte, Bytes.getUnsignedByte(bytes)); + + // SHORTS + short nshort = -300; + Bytes.setShort(bytes, nshort); + assertEquals(nshort, Bytes.getShort(bytes)); + short pshort = 300; + Bytes.setShort(bytes, pshort); + assertEquals(pshort, Bytes.getShort(bytes)); + int smallUshort = 300; + Bytes.setUnsignedShort(bytes, smallUshort); + assertEquals(smallUshort, Bytes.getUnsignedShort(bytes)); + int largeUshort = 60000; + Bytes.setUnsignedShort(bytes, largeUshort); + assertEquals(largeUshort, Bytes.getUnsignedShort(bytes)); + + // INTS + int nint = -60000; + Bytes.setInt(bytes, nint); + assertEquals(nint, Bytes.getInt(bytes)); + int pint = 60000; + Bytes.setInt(bytes, pint); + assertEquals(pint, Bytes.getInt(bytes)); + long smallUint = 60000; + Bytes.setUnsignedInt(bytes, smallUint); + assertEquals(smallUint, Bytes.getUnsignedInt(bytes)); + long largeUint = 4000000000L; + Bytes.setUnsignedInt(bytes, largeUint); + assertEquals(largeUint, Bytes.getUnsignedInt(bytes)); + + // LONGS + long nlong = -4000000000L; + Bytes.setLong(bytes, nlong); + assertEquals(nlong, Bytes.getLong(bytes)); + long plong = 4000000000L; + Bytes.setLong(bytes, plong); + assertEquals(plong, Bytes.getLong(bytes)); + BigInteger smallUlong = new BigInteger("4000000000"); + Bytes.setUnsignedLong(bytes, smallUlong); + assertEquals(smallUlong, Bytes.getUnsignedLong(bytes)); + BigInteger largeUlong = new BigInteger("10000000000000000000"); + Bytes.setUnsignedLong(bytes, largeUlong); + assertEquals(largeUlong, Bytes.getUnsignedLong(bytes)); + + // FLOAT + float floatVal = 123.456f; + Bytes.setFloat(bytes, floatVal); + assertEquals(floatVal, Bytes.getFloat(bytes), 0.001); + + // DOUBLE + double doubleVal = 123.456; + Bytes.setDouble(bytes, doubleVal); + assertEquals(doubleVal, Bytes.getDouble(bytes), 0.001); + + // DECIMAL (32 bits) + BigDecimal smallDecimal = new BigDecimal(BigInteger.valueOf(123456789), 0, + new MathContext(DecimalUtil.MAX_DECIMAL32_PRECISION, RoundingMode.UNNECESSARY)); + Bytes.setBigDecimal(bytes, smallDecimal, DecimalUtil.MAX_DECIMAL32_PRECISION); + assertEquals(smallDecimal, + Bytes.getDecimal(bytes, 0, DecimalUtil.MAX_DECIMAL32_PRECISION, 0)); + BigDecimal negSmallDecimal = new BigDecimal(BigInteger.valueOf(-123456789), 0, + new MathContext(DecimalUtil.MAX_DECIMAL32_PRECISION, RoundingMode.UNNECESSARY)); + Bytes.setBigDecimal(bytes, negSmallDecimal, DecimalUtil.MAX_DECIMAL32_PRECISION); + assertEquals(negSmallDecimal, + Bytes.getDecimal(bytes, 0, DecimalUtil.MAX_DECIMAL32_PRECISION, 0)); + + // DECIMAL (64 bits) + BigDecimal mediumDecimal = new BigDecimal(BigInteger.valueOf(123456789L), 0, + new MathContext(DecimalUtil.MAX_DECIMAL64_PRECISION, RoundingMode.UNNECESSARY)); + Bytes.setBigDecimal(bytes, mediumDecimal, DecimalUtil.MAX_DECIMAL64_PRECISION); + assertEquals(mediumDecimal, + Bytes.getDecimal(bytes, DecimalUtil.MAX_DECIMAL64_PRECISION, 0)); + BigDecimal negMediumDecimal = new BigDecimal(BigInteger.valueOf(-123456789L), 0, + new MathContext(DecimalUtil.MAX_DECIMAL64_PRECISION, RoundingMode.UNNECESSARY)); + Bytes.setBigDecimal(bytes, negMediumDecimal, DecimalUtil.MAX_DECIMAL64_PRECISION); + assertEquals(negMediumDecimal, + Bytes.getDecimal(bytes, DecimalUtil.MAX_DECIMAL64_PRECISION, 0)); + + // DECIMAL (128 bits) + BigDecimal largeDecimal = + new BigDecimal(new java.math.BigInteger("1234567891011121314151617181920212223"), 0, + new MathContext(DecimalUtil.MAX_DECIMAL128_PRECISION, RoundingMode.UNNECESSARY)); + Bytes.setBigDecimal(bytes, largeDecimal, DecimalUtil.MAX_DECIMAL128_PRECISION); + assertEquals(largeDecimal, + Bytes.getDecimal(bytes, DecimalUtil.MAX_DECIMAL128_PRECISION, 0)); + BigDecimal negLargeDecimal = + new BigDecimal(new java.math.BigInteger("-1234567891011121314151617181920212223"), 0, + new MathContext(DecimalUtil.MAX_DECIMAL128_PRECISION, RoundingMode.UNNECESSARY)); + Bytes.setBigDecimal(bytes, negLargeDecimal, DecimalUtil.MAX_DECIMAL128_PRECISION); + assertEquals(negLargeDecimal, + Bytes.getDecimal(bytes, DecimalUtil.MAX_DECIMAL128_PRECISION, 0)); + } + + @Test + public void testHex() { + byte[] bytes = new byte[] { (byte) 0x01, (byte) 0x23, (byte) 0x45, (byte) 0x67, + (byte) 0x89, (byte) 0xAB, (byte) 0xCD, (byte) 0xEF }; + Assert.assertEquals("0x0123456789ABCDEF", Bytes.hex(bytes)); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestClientFailoverSupport.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestClientFailoverSupport.java new file mode 100644 index 0000000000..65a67b85e2 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestClientFailoverSupport.java @@ -0,0 +1,153 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; +import static org.apache.kudu.test.junit.AssertHelpers.assertEventuallyTrue; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.Schema; +import org.apache.kudu.test.CapturingLogAppender; +import org.apache.kudu.test.ClientTestUtil; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.junit.AssertHelpers.BooleanExpression; + +public class TestClientFailoverSupport { + + private static final Schema basicSchema = ClientTestUtil.getBasicSchema(); + + private CapturingLogAppender cla = new CapturingLogAppender(); + private Closeable claAttach; + + enum MasterFailureType { + RESTART, + KILL + } + + private KuduClient client; + private AsyncKuduClient asyncClient; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + claAttach = cla.attach(); + } + + @After + public void checkLogs() throws IOException { + claAttach.close(); + String log = cla.getAppendedText(); + assertFalse("Log should not contain Netty internals", + log.contains("socket.nio.AbstractNioSelector")); + } + + private void waitUntilRowCount(final KuduTable table, final int rowCount, long timeoutMs) + throws Exception { + assertEventuallyTrue(String.format("Read count should be %s", rowCount), + new BooleanExpression() { + @Override + public boolean get() throws Exception { + AsyncKuduScanner scanner = asyncClient.newScannerBuilder(table).build(); + int readCount = countRowsInScan(scanner); + return readCount == rowCount; + } + }, timeoutMs); + } + + @Test(timeout = 100000) + public void testRestartLeaderMaster() throws Exception { + doTestMasterFailover(MasterFailureType.RESTART); + } + + @Test(timeout = 100000) + public void testKillLeaderMaster() throws Exception { + doTestMasterFailover(MasterFailureType.KILL); + } + + /** + * Tests that the Java client will appropriately failover when a new master leader is elected. + * + * We inject some failure on the master, based on 'failureType'. Then we force a tablet + * re-election by killing the leader replica. The client then needs to reconnect to the masters + * to find the new location information. + * + * If we can successfully read back the rows written, that shows the client handled the failover + * correctly. + */ + @SuppressWarnings("deprecation") + private void doTestMasterFailover(MasterFailureType failureType) throws Exception { + final String TABLE_NAME = TestClientFailoverSupport.class.getName() + "-" + failureType; + client.createTable(TABLE_NAME, basicSchema, getBasicCreateTableOptions()); + + KuduTable table = client.openTable(TABLE_NAME); + KuduSession session = client.newSession(); + + final int TOTAL_ROWS_TO_INSERT = 10; + + for (int i = 0; i < TOTAL_ROWS_TO_INSERT; i++) { + session.apply(createBasicSchemaInsert(table, i)); + } + + waitUntilRowCount(table, TOTAL_ROWS_TO_INSERT, DEFAULT_SLEEP); + + // Kill or restart the leader master. + switch (failureType) { + case KILL: + harness.killLeaderMasterServer(); + break; + case RESTART: + harness.restartLeaderMaster(); + break; + default: + throw new IllegalArgumentException("Unexpected failure type: " + failureType); + } + + // Kill the tablet server leader. This will force us to go back to the + // master to find the new location. At that point, the client will + // notice that the old leader master is no longer current and fail over + // to the new one. + List tablets = table.getTabletsLocations(DEFAULT_SLEEP); + assertEquals(1, tablets.size()); + harness.killTabletLeader(tablets.get(0)); + + // Insert some more rows. + for (int i = TOTAL_ROWS_TO_INSERT; i < 2 * TOTAL_ROWS_TO_INSERT; i++) { + session.apply(createBasicSchemaInsert(table, i)); + } + waitUntilRowCount(table, 2 * TOTAL_ROWS_TO_INSERT, DEFAULT_SLEEP); + client.deleteTable(TABLE_NAME); + assertFalse(client.tableExists(TABLE_NAME)); + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestColumnRangePredicate.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestColumnRangePredicate.java new file mode 100644 index 0000000000..a8bc1deac0 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestColumnRangePredicate.java @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +import java.math.BigDecimal; +import java.util.List; + +import com.google.common.collect.Lists; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnTypeAttributes; +import org.apache.kudu.Type; +import org.apache.kudu.test.junit.RetryRule; +import org.apache.kudu.tserver.Tserver; + +public class TestColumnRangePredicate { + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Test + @SuppressWarnings("deprecation") + public void testRawLists() { + final ColumnSchema col1 = + new ColumnSchema.ColumnSchemaBuilder("col1", Type.INT32).build(); + final ColumnSchema col2 = + new ColumnSchema.ColumnSchemaBuilder("col2", Type.STRING).build(); + + final ColumnSchema col3 = new ColumnSchema.ColumnSchemaBuilder("col3", Type.DECIMAL) + .typeAttributes(new ColumnTypeAttributes.ColumnTypeAttributesBuilder() + .precision(6).scale(2).build()).build(); + + ColumnRangePredicate pred1 = new ColumnRangePredicate(col1); + pred1.setLowerBound(1); + + ColumnRangePredicate pred2 = new ColumnRangePredicate(col1); + pred2.setUpperBound(2); + + ColumnRangePredicate pred3 = new ColumnRangePredicate(col2); + pred3.setLowerBound("aaa"); + pred3.setUpperBound("bbb"); + + ColumnRangePredicate pred4 = new ColumnRangePredicate(col3); + pred4.setLowerBound(BigDecimal.valueOf(12345, 2)); + + List preds = Lists.newArrayList(pred1, pred2, pred3, pred4); + + byte[] rawPreds = ColumnRangePredicate.toByteArray(preds); + + List decodedPreds = null; + try { + decodedPreds = ColumnRangePredicate.fromByteArray(rawPreds); + } catch (IllegalArgumentException e) { + fail("Couldn't decode: " + e.getMessage()); + } + + assertEquals(4, decodedPreds.size()); + + assertEquals(col1.getName(), decodedPreds.get(0).getColumn().getName()); + assertEquals(1, Bytes.getInt(decodedPreds.get(0).getLowerBound().toByteArray())); + assertFalse(decodedPreds.get(0).hasInclusiveUpperBound()); + + assertEquals(col1.getName(), decodedPreds.get(1).getColumn().getName()); + assertEquals(2, + Bytes.getInt(decodedPreds.get(1).getInclusiveUpperBound().toByteArray())); + assertFalse(decodedPreds.get(1).hasLowerBound()); + + assertEquals(col2.getName(), decodedPreds.get(2).getColumn().getName()); + assertEquals("aaa", + Bytes.getString(decodedPreds.get(2).getLowerBound().toByteArray())); + assertEquals("bbb", + Bytes.getString(decodedPreds.get(2).getInclusiveUpperBound().toByteArray())); + + assertEquals(col3.getName(), decodedPreds.get(3).getColumn().getName()); + assertEquals(12345, Bytes.getInt(decodedPreds.get(3).getLowerBound().toByteArray())); + assertFalse(decodedPreds.get(0).hasInclusiveUpperBound()); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestConnectToCluster.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestConnectToCluster.java new file mode 100644 index 0000000000..cdb080ea54 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestConnectToCluster.java @@ -0,0 +1,285 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.consensus.Metadata.RaftPeerPB.Role.FOLLOWER; +import static org.apache.kudu.consensus.Metadata.RaftPeerPB.Role.LEADER; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.util.List; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import com.stumbleupon.async.Callback; +import org.hamcrest.CoreMatchers; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.consensus.Metadata; +import org.apache.kudu.master.Master.ConnectToMasterResponsePB; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.KuduTestHarness.MasterServerConfig; +import org.apache.kudu.test.cluster.MiniKuduCluster; + +public class TestConnectToCluster { + + private static final ImmutableList MASTERS = ImmutableList.of( + new HostAndPort("0", 9000), + new HostAndPort("1", 9000), + new HostAndPort("2", 9000)); + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + /** + * Test that the client properly falls back to the old GetMasterRegistration + * RPC when connecting to a master which does not support the new + * ConnectToMaster RPC. + */ + @Test(timeout = 60000) + @MasterServerConfig(flags = { "--master_support_connect_to_master_rpc=0" }) + public void testFallbackConnectRpc() throws Exception { + // Call some method which uses the master. This forces us to connect + // and verifies that the fallback works. + harness.getClient().listTabletServers(); + } + + /** + * Test for KUDU-2200: if a cluster is running multiple masters, but + * the user only specifies one of them in the connection string, + * the resulting exception should clarify their error rather than + * saying that no leader was found. + */ + @Test(timeout = 60000) + public void testConnectToOneOfManyMasters() throws Exception { + int successes = 0; + String[] masterAddrs = harness.getMasterAddressesAsString().split(",", -1); + assertEquals(3, masterAddrs.length); + for (String masterAddr : masterAddrs) { + try (KuduClient c = new KuduClient.KuduClientBuilder(masterAddr).build()) { + // Call some method which uses the master. This forces us to connect. + c.listTabletServers(); + successes++; + } catch (Exception e) { + Assert.assertTrue("unexpected exception: " + e.toString(), + e.toString().matches( + ".*Client configured with 1 master\\(s\\) " + + "\\(.+?\\) but cluster indicates it expects 3 master\\(s\\) " + + "\\(.+?,.+?,.+?\\).*")); + Assert.assertThat(Joiner.on("\n").join(e.getStackTrace()), + CoreMatchers.containsString("testConnectToOneOfManyMasters")); + } + } + + // Typically, one of the connections will have succeeded. However, it's possible + // that 0 succeeded in the case that the masters were slow at electing + // themselves. + Assert.assertTrue(successes <= 1); + } + + + /** + * Unit test which checks that the ConnectToCluster aggregates the + * responses from the different masters properly and returns the + * response from the located leader. + */ + @Test(timeout = 10000) + public void testAggregateResponses() throws Exception { + final NonRecoverableException reusableNRE = new NonRecoverableException( + Status.RuntimeError("")); + final RecoverableException reusableRE = new RecoverableException( + Status.RuntimeError("")); + final NoLeaderFoundException retryResponse = + new NoLeaderFoundException(Status.RuntimeError("")); + // We don't test for a particular good response, so as long as we pass something that's not an + // exception to runTest() we're good. + Object successResponse = new Object(); + + // Success cases. + + // Normal case. + runTest( + makeCTMR(LEADER, MASTERS), + makeCTMR(FOLLOWER, MASTERS), + makeCTMR(FOLLOWER, MASTERS), + successResponse); + + // Permutation works too. + runTest( + makeCTMR(FOLLOWER, MASTERS), + makeCTMR(LEADER, MASTERS), + makeCTMR(FOLLOWER, MASTERS), + successResponse); + + // Multiple leaders, that's fine since it might be a TOCTOU situation, or one master + // is confused. Raft handles this if the client then tries to do something that requires a + // replication on the master-side. + runTest( + makeCTMR(LEADER, MASTERS), + makeCTMR(LEADER, MASTERS), + makeCTMR(FOLLOWER, MASTERS), + successResponse); + + // Mixed bag, still works because there's a leader. + runTest( + reusableNRE, + makeCTMR(FOLLOWER, MASTERS), + makeCTMR(LEADER, MASTERS), + successResponse); + + // All unreachable except one leader, still good. + runTest( + reusableNRE, + reusableNRE, + makeCTMR(LEADER, MASTERS), + successResponse); + + // Permutation of the previous. + runTest( + reusableNRE, + makeCTMR(LEADER, MASTERS), + reusableNRE, + successResponse); + + // Client try to connect three masters, but the cluster is configure with only one master. + // If connect to a leader master, success. + runTest( + reusableNRE, + reusableNRE, + makeCTMR(LEADER, ImmutableList.of(MASTERS.get(0))), + successResponse); + + // Retry cases. + + // Just followers means we retry. + runTest( + makeCTMR(FOLLOWER, MASTERS), + makeCTMR(FOLLOWER, MASTERS), + makeCTMR(FOLLOWER, MASTERS), + retryResponse); + + // One NRE but we have responsive masters, retry. + runTest( + makeCTMR(FOLLOWER, MASTERS), + makeCTMR(FOLLOWER, MASTERS), + reusableNRE, + retryResponse); + + // One good master but no leader, retry. + runTest( + reusableNRE, + makeCTMR(FOLLOWER, MASTERS), + reusableNRE, + retryResponse); + + // Different case but same outcome. + runTest( + reusableRE, + reusableNRE, + makeCTMR(FOLLOWER, MASTERS), + retryResponse); + + // All recoverable means retry. + runTest( + reusableRE, + reusableRE, + reusableRE, + retryResponse); + + // Just one recoverable still means retry. + runTest( + reusableRE, + reusableNRE, + reusableNRE, + retryResponse); + + // Client try to connect three masters, but the cluster is configure with only one master. + // If the master hasn't become a leader, retry. + runTest( + reusableNRE, + reusableNRE, + makeCTMR(FOLLOWER, ImmutableList.of(MASTERS.get(0))), + retryResponse); + + // Failure case. + + // Can't recover anything, give up. + runTest( + reusableNRE, + reusableNRE, + reusableNRE, + reusableNRE); + } + + private void runTest(Object response0, + Object response1, + Object response2, + Object expectedResponse) throws Exception { + + // Here we basically do what AsyncKuduClient would do, add all the callbacks and then we also + // add the responses. We then check for the right response. + + ConnectToCluster grrm = new ConnectToCluster(MASTERS); + + Callback cb0 = grrm.callbackForNode(MASTERS.get(0)); + Callback cb1 = grrm.callbackForNode(MASTERS.get(1)); + Callback cb2 = grrm.callbackForNode(MASTERS.get(2)); + + Callback eb0 = grrm.errbackForNode(MASTERS.get(0)); + Callback eb1 = grrm.errbackForNode(MASTERS.get(1)); + Callback eb2 = grrm.errbackForNode(MASTERS.get(2)); + + callTheRightCallback(cb0, eb0, response0); + callTheRightCallback(cb1, eb1, response1); + callTheRightCallback(cb2, eb2, response2); + + try { + grrm.getDeferred().join(); // Don't care about the response. + if ((expectedResponse instanceof Exception)) { + fail("Should not work " + expectedResponse.getClass()); + } + } catch (Exception ex) { + assertEquals(expectedResponse.getClass(), ex.getClass()); + } + } + + // Helper method that determines if the callback or errback should be called. + private static void callTheRightCallback( + Callback cb, + Callback eb, + Object response) throws Exception { + if (response instanceof Exception) { + eb.call((Exception) response); + } else { + cb.call((ConnectToMasterResponsePB) response); + } + } + + // Helper method to make a ConnectToMasterResponsePB. + private static ConnectToMasterResponsePB makeCTMR(Metadata.RaftPeerPB.Role role, + List masters) { + ConnectToMasterResponsePB.Builder b = ConnectToMasterResponsePB.newBuilder(); + b.setRole(role); + for (HostAndPort master : masters) { + b.addMasterAddrs(ProtobufHelper.hostAndPortToPB(master)); + } + return b.build(); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestConnectionCache.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestConnectionCache.java new file mode 100644 index 0000000000..3486cc33ad --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestConnectionCache.java @@ -0,0 +1,119 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertTrue; + +import com.stumbleupon.async.Deferred; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.util.NetUtil; + +public class TestConnectionCache { + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Test(timeout = 50000) + @SuppressWarnings("FutureReturnValueIgnored") + public void test() throws Exception { + AsyncKuduClient client = harness.getAsyncClient(); + + // Below we ping the masters directly using RpcProxy, so if they aren't ready to process + // RPCs we'll get an error. Here by listing the tables we make sure this won't happen since + // it won't return until a master leader is found. + client.getTablesList().join(); + + HostAndPort masterHostPort = harness.getMasterServers().get(0); + ServerInfo firstMaster = new ServerInfo("fake-uuid", + masterHostPort, + NetUtil.getInetAddress(masterHostPort.getHost()), + /*location=*/""); + + // 3 masters in the cluster. Connections should have been cached since we forced + // a cluster connection above. + // No tservers have been connected to by the client since we haven't accessed + // any data. + assertEquals(3, client.getConnectionListCopy().size()); + assertFalse(allConnectionsTerminated(client)); + + final RpcProxy proxy = client.newRpcProxy(firstMaster); + + // Disconnect from the server. + proxy.getConnection().disconnect().awaitUninterruptibly(); + waitForConnectionToTerminate(proxy.getConnection()); + assertTrue(proxy.getConnection().isTerminated()); + + // Make sure not all the connections in the connection cache are disconnected yet. Actually, + // only the connection to server '0' should be disconnected. + assertFalse(allConnectionsTerminated(client)); + + // For a new RpcProxy instance, a new connection to the same destination is established. + final RpcProxy newHelper = client.newRpcProxy(firstMaster); + final Connection newConnection = newHelper.getConnection(); + assertNotNull(newConnection); + assertNotSame(proxy.getConnection(), newConnection); + + // The client-->server connection should not be established at this point yet. Wait a little + // before checking the state of the connection: this is to check for the status of the + // underlying connection _after_ the negotiation is run, if a regression happens. The + // negotiation on the underlying connection should be run upon submitting the very first + // RPC via the proxy object, not upon creating RpcProxy instance (see KUDU-1878). + Thread.sleep(500); + assertFalse(newConnection.isReady()); + pingConnection(newHelper); + assertTrue(newConnection.isReady()); + + // Test disconnecting and make sure we cleaned up all the connections. + for (Connection c : client.getConnectionListCopy()) { + c.disconnect().awaitUninterruptibly(); + waitForConnectionToTerminate(c); + } + assertTrue(allConnectionsTerminated(client)); + } + + private boolean allConnectionsTerminated(AsyncKuduClient client) { + for (Connection c : client.getConnectionListCopy()) { + if (!c.isTerminated()) { + return false; + } + } + return true; + } + + private void waitForConnectionToTerminate(Connection c) throws InterruptedException { + TimeoutTracker timeoutTracker = new TimeoutTracker(); + timeoutTracker.setTimeout(5000); + while (!c.isTerminated() && !timeoutTracker.timedOut()) { + Thread.sleep(250); + } + } + + private void pingConnection(RpcProxy proxy) throws Exception { + PingRequest ping = PingRequest.makeMasterPingRequest(); + Deferred d = ping.getDeferred(); + proxy.sendRpc(ping); + d.join(10000); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestErrorCollector.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestErrorCollector.java new file mode 100644 index 0000000000..0ae9d6d5e9 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestErrorCollector.java @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.junit.RetryRule; + +public class TestErrorCollector { + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Test + public void testErrorCollector() { + int maxErrors = 10; + ErrorCollector collector = new ErrorCollector(maxErrors); + + // Test with no errors. + int countToTest = 0; + Assert.assertEquals(countToTest, collector.countErrors()); + RowErrorsAndOverflowStatus reos = collector.getErrors(); + Assert.assertEquals(0, collector.countErrors()); + Assert.assertFalse(reos.isOverflowed()); + Assert.assertEquals(countToTest, reos.getRowErrors().length); + + // Test a single row error. + countToTest = 1; + collector.addError(createRowError(countToTest)); + Assert.assertEquals(countToTest, collector.countErrors()); + reos = collector.getErrors(); + Assert.assertEquals(0, collector.countErrors()); + Assert.assertFalse(reos.isOverflowed()); + Assert.assertEquals(countToTest, reos.getRowErrors().length); + Assert.assertEquals(countToTest, reos.getRowErrors()[0].getErrorStatus().getPosixCode()); + + // Test filling the collector to the max. + countToTest = maxErrors; + fillCollectorWith(collector, countToTest); + Assert.assertEquals(countToTest, collector.countErrors()); + reos = collector.getErrors(); + Assert.assertEquals(0, collector.countErrors()); + Assert.assertFalse(reos.isOverflowed()); + Assert.assertEquals(countToTest, reos.getRowErrors().length); + Assert.assertEquals(countToTest - 1, reos.getRowErrors()[9].getErrorStatus().getPosixCode()); + + // Test overflowing. + countToTest = 95; + fillCollectorWith(collector, countToTest); + Assert.assertEquals(maxErrors, collector.countErrors()); + reos = collector.getErrors(); + Assert.assertEquals(0, collector.countErrors()); + Assert.assertTrue(reos.isOverflowed()); + Assert.assertEquals(maxErrors, reos.getRowErrors().length); + Assert.assertEquals(countToTest - 1, reos.getRowErrors()[9].getErrorStatus().getPosixCode()); + + // Test overflowing on a newly created collector. + countToTest = 95; + collector = new ErrorCollector(maxErrors); + fillCollectorWith(collector, countToTest); + Assert.assertEquals(maxErrors, collector.countErrors()); + reos = collector.getErrors(); + Assert.assertEquals(0, collector.countErrors()); + Assert.assertTrue(reos.isOverflowed()); + Assert.assertEquals(maxErrors, reos.getRowErrors().length); + Assert.assertEquals(countToTest - 1, reos.getRowErrors()[9].getErrorStatus().getPosixCode()); + + // Test enlarging non-overflown collector + countToTest = 10; + fillCollectorWith(collector, countToTest); + Assert.assertEquals(maxErrors, collector.countErrors()); + collector.resize(2 * maxErrors); + reos = collector.getErrors(); + Assert.assertEquals(0, collector.countErrors()); + Assert.assertFalse(reos.isOverflowed()); + Assert.assertEquals(maxErrors, reos.getRowErrors().length); + Assert.assertEquals(countToTest - 1, reos.getRowErrors()[9].getErrorStatus().getPosixCode()); + + // Test enlarging overflown collector + countToTest = 11; + collector = new ErrorCollector(maxErrors); + fillCollectorWith(collector, countToTest); + Assert.assertEquals(maxErrors, collector.countErrors()); + collector.resize(2 * maxErrors); + collector.addError(createRowError(42)); + reos = collector.getErrors(); + Assert.assertEquals(0, collector.countErrors()); + Assert.assertTrue(reos.isOverflowed()); + Assert.assertEquals(11, reos.getRowErrors().length); + Assert.assertEquals(42, reos.getRowErrors()[10].getErrorStatus().getPosixCode()); + + // Test shrinking without overflow + countToTest = 5; + fillCollectorWith(collector, countToTest); + Assert.assertEquals(countToTest, collector.countErrors()); + collector.resize(maxErrors); + reos = collector.getErrors(); + Assert.assertEquals(0, collector.countErrors()); + Assert.assertFalse(reos.isOverflowed()); + Assert.assertEquals(countToTest, reos.getRowErrors().length); + Assert.assertEquals(countToTest - 1, reos.getRowErrors()[4].getErrorStatus().getPosixCode()); + + // Test shrinking with overflow + countToTest = 5; + fillCollectorWith(collector, countToTest); + Assert.assertEquals(countToTest, collector.countErrors()); + collector.resize(countToTest - 1); + reos = collector.getErrors(); + Assert.assertEquals(0, collector.countErrors()); + Assert.assertTrue(reos.isOverflowed()); + Assert.assertEquals(countToTest - 1, reos.getRowErrors().length); + // the oldest error is popped + Assert.assertEquals(countToTest - 1, reos.getRowErrors()[3].getErrorStatus().getPosixCode()); + } + + private void fillCollectorWith(ErrorCollector collector, int errorsToAdd) { + for (int i = 0; i < errorsToAdd; i++) { + collector.addError(createRowError(i)); + } + } + + private RowError createRowError(int id) { + // Use the error status as a way to message pass and so that we can test we're getting the right + // messages on the other end. + return new RowError(Status.NotAuthorized("test", id), null, "test"); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestFlexiblePartitioning.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestFlexiblePartitioning.java new file mode 100644 index 0000000000..f7e8ffdc24 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestFlexiblePartitioning.java @@ -0,0 +1,475 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; +import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Predicate; +import com.google.common.base.Predicates; +import com.google.common.collect.ComparisonChain; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.test.KuduTestHarness; + +public class TestFlexiblePartitioning { + private static final Logger LOG = LoggerFactory.getLogger(TestKuduClient.class); + private String tableName; + + private KuduClient client; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + tableName = TestFlexiblePartitioning.class.getName() + "-" + System.currentTimeMillis(); + } + + private static Schema createSchema() { + ArrayList columns = new ArrayList<>(3); + columns.add(new ColumnSchema.ColumnSchemaBuilder("a", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("b", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c", Type.STRING).key(true).build()); + return new Schema(columns); + } + + private static Set rows() throws KuduException { + Set rows = new HashSet<>(); + for (int a = 0; a < 6; a++) { + for (int b = 0; b < 6; b++) { + for (int c = 0; c < 6; c++) { + rows.add(new Row(String.format("%s", a), + String.format("%s", b), + String.format("%s", c))); + } + } + } + return rows; + } + + private void insertRows(KuduTable table, Set rows) throws KuduException { + KuduSession session = client.newSession(); + try { + for (Row row : rows) { + Insert insert = table.newInsert(); + PartialRow insertRow = insert.getRow(); + row.fillPartialRow(insertRow); + session.apply(insert); + } + } finally { + session.close(); + } + } + + private Set collectRows(KuduScanner scanner) throws KuduException { + Set rows = new HashSet<>(); + for (RowResult result : scanner) { + rows.add(Row.fromResult(result)); + } + return rows; + } + + /** + * Collects the rows from a set of scan tokens. + * + * @param scanTokens the scan token builder + * @return the rows + */ + private Set collectRows(KuduScanToken.KuduScanTokenBuilder scanTokens) throws Exception { + Set rows = new HashSet<>(); + for (KuduScanToken token : scanTokens.build()) { + LOG.debug("Scanning token: {}", KuduScanToken.stringifySerializedToken(token.serialize(), + client)); + + int existingCount = rows.size(); + Set newRows = collectRows(token.intoScanner(client)); + rows.addAll(newRows); + assertEquals(existingCount + newRows.size(), rows.size()); + } + return rows; + } + + @SuppressWarnings("deprecation") + private void testPartitionSchema(CreateTableOptions tableBuilder) throws Exception { + Schema schema = createSchema(); + + client.createTable(tableName, schema, tableBuilder); + + KuduTable table = client.openTable(tableName); + + Set rows = rows(); + insertRows(table, rows); + + // Full table scan + assertEquals(rows, collectRows(client.newScannerBuilder(table).build())); + + { // Lower bound + Row minRow = new Row("1", "3", "5"); + PartialRow lowerBound = schema.newPartialRow(); + minRow.fillPartialRow(lowerBound); + + Set expected = Sets.filter(rows, minRow.gtePred()::apply); + + KuduScanner scanner = client.newScannerBuilder(table).lowerBound(lowerBound).build(); + Set results = collectRows(scanner); + assertEquals(expected, results); + + KuduScanToken.KuduScanTokenBuilder scanTokens = + client.newScanTokenBuilder(table).lowerBound(lowerBound); + Set tokenResults = collectRows(scanTokens); + assertEquals(expected, tokenResults); + } + + { // Upper bound + Row maxRow = new Row("1", "3", "5"); + PartialRow upperBound = schema.newPartialRow(); + maxRow.fillPartialRow(upperBound); + + Set expected = Sets.filter(rows, maxRow.ltPred()); + + KuduScanner scanner = client.newScannerBuilder(table) + .exclusiveUpperBound(upperBound) + .build(); + Set results = collectRows(scanner); + assertEquals(expected, results); + + KuduScanToken.KuduScanTokenBuilder scanTokens = + client.newScanTokenBuilder(table).exclusiveUpperBound(upperBound); + Set tokenResults = collectRows(scanTokens); + assertEquals(expected, tokenResults); + } + + { // Lower & Upper bounds + Row minRow = new Row("1", "3", "5"); + Row maxRow = new Row("2", "4", ""); + PartialRow lowerBound = schema.newPartialRow(); + minRow.fillPartialRow(lowerBound); + PartialRow upperBound = schema.newPartialRow(); + maxRow.fillPartialRow(upperBound); + + Set expected = + Sets.filter(rows, Predicates.and(minRow.gtePred()::apply, maxRow.ltPred())); + + KuduScanner scanner = client.newScannerBuilder(table) + .lowerBound(lowerBound) + .exclusiveUpperBound(upperBound) + .build(); + Set results = collectRows(scanner); + assertEquals(expected, results); + + KuduScanToken.KuduScanTokenBuilder scanTokens = + client.newScanTokenBuilder(table) + .lowerBound(lowerBound) + .exclusiveUpperBound(upperBound); + Set tokenResults = collectRows(scanTokens); + assertEquals(expected, tokenResults); + } + + List tablets = table.getTabletsLocations(DEFAULT_SLEEP); + + { // Per-tablet scan + Set results = new HashSet<>(); + + for (LocatedTablet tablet : tablets) { + KuduScanner scanner = client.newScannerBuilder(table) + .lowerBoundPartitionKeyRaw(tablet.getPartition().getPartitionKeyStart()) + .exclusiveUpperBoundPartitionKeyRaw(tablet.getPartition().getPartitionKeyEnd()) + .build(); + Set tabletResults = collectRows(scanner); + Set intersection = Sets.intersection(results, tabletResults); + assertEquals(new HashSet<>(), intersection); + results.addAll(tabletResults); + } + + assertEquals(rows, results); + } + + { // Per-tablet scan with lower & upper bounds + Row minRow = new Row("1", "3", "5"); + Row maxRow = new Row("2", "4", ""); + PartialRow lowerBound = schema.newPartialRow(); + minRow.fillPartialRow(lowerBound); + PartialRow upperBound = schema.newPartialRow(); + maxRow.fillPartialRow(upperBound); + + Set expected = Sets.filter(rows, + Predicates.and(minRow.gtePred()::apply, maxRow.ltPred())); + Set results = new HashSet<>(); + + for (LocatedTablet tablet : tablets) { + KuduScanner scanner = client.newScannerBuilder(table) + .lowerBound(lowerBound) + .exclusiveUpperBound(upperBound) + .lowerBoundPartitionKeyRaw(tablet.getPartition().getPartitionKeyStart()) + .exclusiveUpperBoundPartitionKeyRaw(tablet.getPartition().getPartitionKeyEnd()) + .build(); + Set tabletResults = collectRows(scanner); + Set intersection = Sets.intersection(results, tabletResults); + assertEquals(new HashSet<>(), intersection); + results.addAll(tabletResults); + } + + assertEquals(expected, results); + } + } + + @Test(timeout = 100000) + public void testHashBucketedTable() throws Exception { + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.addHashPartitions(ImmutableList.of("a"), 3); + tableBuilder.addHashPartitions(ImmutableList.of("b", "c"), 3, 42); + tableBuilder.setRangePartitionColumns(ImmutableList.of()); + testPartitionSchema(tableBuilder); + } + + @Test(timeout = 100000) + public void testNonDefaultRangePartitionedTable() throws Exception { + Schema schema = createSchema(); + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(ImmutableList.of("c", "b")); + + PartialRow split = schema.newPartialRow(); + split.addString("c", "3"); + tableBuilder.addSplitRow(split); + + split = schema.newPartialRow(); + split.addString("c", "3"); + split.addString("b", "3"); + tableBuilder.addSplitRow(split); + + testPartitionSchema(tableBuilder); + } + + @Test(timeout = 100000) + public void testHashBucketedAndRangePartitionedTable() throws Exception { + Schema schema = createSchema(); + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.addHashPartitions(ImmutableList.of("a"), 3); + tableBuilder.addHashPartitions(ImmutableList.of("b", "c"), 3, 42); + tableBuilder.setRangePartitionColumns(ImmutableList.of("c", "b")); + + PartialRow split = schema.newPartialRow(); + split.addString("c", "3"); + tableBuilder.addSplitRow(split); + + split = schema.newPartialRow(); + split.addString("c", "3"); + split.addString("b", "3"); + tableBuilder.addSplitRow(split); + + testPartitionSchema(tableBuilder); + } + + @Test(timeout = 100000) + public void testNonCoveredRangePartitionedTable() throws Exception { + Schema schema = createSchema(); + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(ImmutableList.of("a", "b", "c")); + + // Create a non covered range between (3, 5, 6) and (4, 0, 0) + + PartialRow lowerBoundA = schema.newPartialRow(); + lowerBoundA.addString("a", "0"); + lowerBoundA.addString("b", "0"); + lowerBoundA.addString("c", "0"); + PartialRow upperBoundA = schema.newPartialRow(); + upperBoundA.addString("a", "3"); + upperBoundA.addString("b", "5"); + upperBoundA.addString("b", "6"); + tableBuilder.addRangePartition(lowerBoundA, upperBoundA); + + PartialRow lowerBoundB = schema.newPartialRow(); + lowerBoundB.addString("a", "4"); + lowerBoundB.addString("b", "0"); + lowerBoundB.addString("c", "0"); + PartialRow upperBoundB = schema.newPartialRow(); + upperBoundB.addString("a", "5"); + upperBoundB.addString("b", "5"); + upperBoundB.addString("b", "6"); + tableBuilder.addRangePartition(lowerBoundB, upperBoundB); + + testPartitionSchema(tableBuilder); + } + + @Test(timeout = 100000) + public void testHashBucketedAndNonCoveredRangePartitionedTable() throws Exception { + Schema schema = createSchema(); + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(ImmutableList.of("a", "b", "c")); + + // Create a non covered range between (3, 5, 6) and (4, 0, 0) + + PartialRow lowerBoundA = schema.newPartialRow(); + lowerBoundA.addString("a", "0"); + lowerBoundA.addString("b", "0"); + lowerBoundA.addString("c", "0"); + PartialRow upperBoundA = schema.newPartialRow(); + upperBoundA.addString("a", "3"); + upperBoundA.addString("b", "5"); + upperBoundA.addString("c", "6"); + tableBuilder.addRangePartition(lowerBoundA, upperBoundA); + + PartialRow lowerBoundB = schema.newPartialRow(); + lowerBoundB.addString("a", "4"); + lowerBoundB.addString("b", "0"); + lowerBoundB.addString("c", "0"); + PartialRow upperBoundB = schema.newPartialRow(); + upperBoundB.addString("a", "5"); + upperBoundB.addString("b", "5"); + upperBoundB.addString("c", "6"); + tableBuilder.addRangePartition(lowerBoundB, upperBoundB); + + tableBuilder.addHashPartitions(ImmutableList.of("a", "b", "c"), 4); + + testPartitionSchema(tableBuilder); + } + + @Test(timeout = 100000) + public void testSimplePartitionedTable() throws Exception { + Schema schema = createSchema(); + CreateTableOptions tableBuilder = + new CreateTableOptions().setRangePartitionColumns(ImmutableList.of("a", "b", "c")); + + PartialRow split = schema.newPartialRow(); + split.addString("c", "3"); + tableBuilder.addSplitRow(split); + + split = schema.newPartialRow(); + split.addString("c", "3"); + split.addString("b", "3"); + tableBuilder.addSplitRow(split); + + testPartitionSchema(tableBuilder); + } + + @Test(timeout = 100000) + public void testUnpartitionedTable() throws Exception { + CreateTableOptions tableBuilder = + new CreateTableOptions().setRangePartitionColumns(ImmutableList.of()); + testPartitionSchema(tableBuilder); + } + + public static class Row implements Comparable { + private final String valA; + private final String valB; + private final String valC; + + public Row(String a, String b, String c) { + this.valA = a; + this.valB = b; + this.valC = c; + } + + public String getValA() { + return valA; + } + + public String getValB() { + return valB; + } + + public String getValC() { + return valC; + } + + public void fillPartialRow(PartialRow row) { + row.addString("a", valA); + row.addString("b", valB); + row.addString("c", valC); + } + + private static Row fromResult(RowResult result) { + return new Row(result.getString("a"), + result.getString("b"), + result.getString("c")); + } + + public Predicate gtePred() { + return new Predicate() { + @Override + public boolean apply(Row other) { + return other.compareTo(Row.this) >= 0; + } + }; + } + + public Predicate ltPred() { + return new Predicate() { + @Override + public boolean apply(Row other) { + return other.compareTo(Row.this) < 0; + } + }; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof Row)) { + return false; + } + Row row = (Row) o; + return Objects.equals(valA, row.valA) && + Objects.equals(valB, row.valB) && + Objects.equals(valC, row.valC); + } + + @Override + public int hashCode() { + return Objects.hash(valA, valB, valC); + } + + @Override + public int compareTo(Row other) { + return ComparisonChain.start() + .compare(valA, other.valA) + .compare(valB, other.valB) + .compare(valC, other.valC) + .result(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("a", valA) + .add("b", valB) + .add("c", valC) + .toString(); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestHandleTooBusy.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestHandleTooBusy.java new file mode 100644 index 0000000000..b3cb9f5dad --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestHandleTooBusy.java @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; + +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import com.google.common.collect.Lists; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.Schema; +import org.apache.kudu.test.ClientTestUtil; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.cluster.MiniKuduCluster.MiniKuduClusterBuilder; + +/** + * Tests which provoke RPC queue overflow errors on the server side + * to ensure that we properly handle them in the client. + */ +public class TestHandleTooBusy { + private static final String TABLE_NAME = "TestHandleTooBusy"; + + private static final Schema basicSchema = ClientTestUtil.getBasicSchema(); + + private static final MiniKuduClusterBuilder clusterBuilder = + KuduTestHarness.getBaseClusterBuilder() + // Short queue to provoke overflow. + .addMasterServerFlag("--rpc_service_queue_length=1") + // Low number of service threads, so things stay in the queue. + .addMasterServerFlag("--rpc_num_service_threads=3") + // inject latency so lookups process slowly. + .addMasterServerFlag("--master_inject_latency_on_tablet_lookups_ms=100"); + + @Rule + public KuduTestHarness harness = new KuduTestHarness(clusterBuilder); + + /** + * Provoke overflows in the master RPC queue while connecting to the master + * and performing location lookups. + */ + @Test(timeout = 60000) + public void testMasterLookupOverflow() throws Exception { + harness.getClient().createTable(TABLE_NAME, basicSchema, getBasicCreateTableOptions()); + ExecutorService exec = Executors.newCachedThreadPool(); + List> futures = Lists.newArrayList(); + for (int thr = 0; thr < 10; thr++) { + futures.add(exec.submit(new Callable() { + @Override + public Void call() throws Exception { + for (int i = 0; i < 5; i++) { + try (KuduClient c = + new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()) + .build()) { + KuduTable table = c.openTable(TABLE_NAME); + for (int j = 0; j < 5; j++) { + KuduScanToken.KuduScanTokenBuilder scanBuilder = c.newScanTokenBuilder(table); + scanBuilder.build(); + c.asyncClient.emptyTabletsCacheForTable(table.getTableId()); + } + } + } + return null; + } + })); + } + for (Future f : futures) { + f.get(); + } + } + +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestHybridTime.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestHybridTime.java new file mode 100644 index 0000000000..f5bc9a81cb --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestHybridTime.java @@ -0,0 +1,167 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.Type.STRING; +import static org.apache.kudu.client.ExternalConsistencyMode.CLIENT_PROPAGATED; +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.util.HybridTimeUtil.HTTimestampToPhysicalAndLogical; +import static org.apache.kudu.util.HybridTimeUtil.clockTimestampToHTTimestamp; +import static org.apache.kudu.util.HybridTimeUtil.physicalAndLogicalToHTTimestamp; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Ordering; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.cluster.MiniKuduCluster.MiniKuduClusterBuilder; + +/** + * Tests client propagated timestamps. All the work for commit wait is done and tested on the + * server-side, so it is not tested here. + */ +public class TestHybridTime { + private static final Logger LOG = LoggerFactory.getLogger(TestHybridTime.class); + + // Generate a unique table name + private static final String TABLE_NAME = + TestHybridTime.class.getName() + "-" + System.currentTimeMillis(); + + private static final Schema schema = getSchema(); + private KuduTable table; + private KuduClient client; + + private static final MiniKuduClusterBuilder clusterBuilder = + KuduTestHarness.getBaseClusterBuilder(); + + @Rule + public KuduTestHarness harness = new KuduTestHarness(clusterBuilder); + + @Before + public void setUp() throws Exception { + client = harness.getClient(); + // Use one tablet because multiple tablets don't work: we could jump from one tablet to another + // which could change the logical clock. + CreateTableOptions builder = + new CreateTableOptions().setRangePartitionColumns(ImmutableList.of("key")); + table = client.createTable(TABLE_NAME, schema, builder); + } + + private static Schema getSchema() { + ArrayList columns = new ArrayList<>(1); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", STRING) + .key(true) + .build()); + return new Schema(columns); + } + + /** + * Write three rows. Increment the timestamp we get back from the first write to put it in the + * future. The remaining writes should force an update to the server's clock and only increment + * the logical value. Check that the client propagates the timestamp correctly by scanning + * back the appropriate rows at the appropriate snapshots. + */ + @Test(timeout = 100000) + public void test() throws Exception { + KuduSession session = client.newSession(); + session.setExternalConsistencyMode(CLIENT_PROPAGATED); + + // Test timestamp propagation with AUTO_FLUSH_SYNC flush mode. + session.setFlushMode(KuduSession.FlushMode.AUTO_FLUSH_SYNC); + List logicalValues = new ArrayList<>(); + + // Perform one write so we receive a timestamp from the server and can use it to propagate a + // modified timestamp back to the server. Following writes should force the servers to update + // their clocks to this value and increment the logical component of the timestamp. + insertRow(session, "0"); + assertTrue(client.hasLastPropagatedTimestamp()); + long[] clockValues = HTTimestampToPhysicalAndLogical(client.getLastPropagatedTimestamp()); + assertEquals(clockValues[1], 0); + long futureTs = clockValues[0] + 5000000; + client.updateLastPropagatedTimestamp(clockTimestampToHTTimestamp(futureTs, + TimeUnit.MICROSECONDS)); + + String[] keys = new String[] {"1", "2", "3", "11", "22", "33"}; + for (int i = 0; i < keys.length; i++) { + if (i == keys.length / 2) { + // Switch flush mode to test timestamp propagation with MANUAL_FLUSH. + session.setFlushMode(AsyncKuduSession.FlushMode.MANUAL_FLUSH); + } + insertRow(session, keys[i]); + assertTrue(client.hasLastPropagatedTimestamp()); + clockValues = HTTimestampToPhysicalAndLogical(client.getLastPropagatedTimestamp()); + LOG.debug("Clock value after write[%d]: %s Logical value: %d", + i, new Date(clockValues[0] / 1000).toString(), clockValues[1]); + assertEquals(clockValues[0], futureTs); + logicalValues.add(clockValues[1]); + assertTrue(Ordering.natural().isOrdered(logicalValues)); + } + + // Scan all rows with READ_LATEST (the default), which should retrieve all rows. + assertEquals(1 + keys.length, countRowsInScan(client.newScannerBuilder(table).build())); + + // Now scan at multiple snapshots with READ_AT_SNAPSHOT. The logical timestamp from the 'i'th + // row (counted from 0) combined with the latest physical timestamp should observe 'i + 1' rows. + for (int i = 0; i < logicalValues.size(); i++) { + long logicalValue = logicalValues.get(i); + long snapshotTime = physicalAndLogicalToHTTimestamp(futureTs, logicalValues.get(i)); + int expected = i + 1; + assertEquals( + String.format("wrong number of rows for write %d at logical timestamp %d", + i, logicalValue), + expected, scanAtSnapshot(snapshotTime)); + } + + // The last snapshots needs to be one into the future w.r.t. the last write's timestamp + // to get all rows, but the snapshot timestamp can't be bigger than the propagated + // timestamp. Ergo increase the propagated timestamp first. + long latestLogicalValue = logicalValues.get(logicalValues.size() - 1); + client.updateLastPropagatedTimestamp(client.getLastPropagatedTimestamp() + 1); + long snapshotTime = physicalAndLogicalToHTTimestamp(futureTs, latestLogicalValue + 1); + assertEquals(1 + keys.length, scanAtSnapshot(snapshotTime)); + } + + private int scanAtSnapshot(long time) throws Exception { + AsyncKuduScanner.AsyncKuduScannerBuilder builder = + harness.getAsyncClient().newScannerBuilder(table) + .snapshotTimestampRaw(time) + .readMode(AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT); + return countRowsInScan(builder.build()); + } + + private void insertRow(KuduSession session, String key) throws KuduException { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString(0, key); + session.apply(insert); + session.flush(); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKeyEncoding.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKeyEncoding.java new file mode 100644 index 0000000000..b200df1525 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKeyEncoding.java @@ -0,0 +1,510 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.math.BigDecimal; +import java.sql.Date; +import java.util.ArrayList; +import java.util.List; + +import com.google.common.collect.ImmutableList; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnSchema.ColumnSchemaBuilder; +import org.apache.kudu.Common; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.PartitionSchema.HashBucketSchema; +import org.apache.kudu.client.PartitionSchema.RangeSchema; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.util.CharUtil; +import org.apache.kudu.util.DateUtil; +import org.apache.kudu.util.DecimalUtil; + +public class TestKeyEncoding { + + private KuduClient client; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + } + + private static Schema buildSchema(ColumnSchemaBuilder... columns) { + int i = 0; + Common.SchemaPB.Builder pb = Common.SchemaPB.newBuilder(); + for (ColumnSchemaBuilder column : columns) { + Common.ColumnSchemaPB columnPb = + ProtobufHelper.columnToPb(Common.ColumnSchemaPB.newBuilder(), + i++, + column.build()); + pb.addColumns(columnPb); + } + return ProtobufHelper.pbToSchema(pb.build()); + } + + private static void assertBytesEquals(byte[] actual, byte[] expected) { + assertTrue(String.format("expected: '%s', got: '%s'", + Bytes.pretty(expected), + Bytes.pretty(actual)), + Bytes.equals(expected, actual)); + } + + private static void assertBytesEquals(byte[] actual, String expected) { + assertBytesEquals(actual, expected.getBytes(UTF_8)); + } + + /** + * Builds the default partition schema for a schema. + * @param schema the schema + * @return a default partition schema + */ + private PartitionSchema defaultPartitionSchema(Schema schema) { + List columnIds = new ArrayList<>(); + for (int i = 0; i < schema.getPrimaryKeyColumnCount(); i++) { + // Schema does not provide a way to lookup a column ID by column index, + // so instead we assume that the IDs for the primary key columns match + // their respective index, which holds up when the schema is created + // with buildSchema. + columnIds.add(i); + } + return new PartitionSchema( + new PartitionSchema.RangeSchema(columnIds), ImmutableList.of(), schema); + } + + /** + * Builds the default CreateTableOptions for a schema. + * + * @param schema the schema + * @return a default CreateTableOptions + */ + private CreateTableOptions defaultCreateTableOptions(Schema schema) { + List columnNames = new ArrayList<>(); + for (ColumnSchema columnSchema : schema.getPrimaryKeyColumns()) { + columnNames.add(columnSchema.getName()); + } + return new CreateTableOptions() + .setRangePartitionColumns(columnNames); + } + + @Test + public void testPrimaryKeys() { + Schema schemaOneString = + buildSchema(new ColumnSchema.ColumnSchemaBuilder("key", Type.STRING).key(true)); + KuduTable table = new KuduTable(null, "one", "one", schemaOneString, + defaultPartitionSchema(schemaOneString), 3, null, null, null); + Insert oneKeyInsert = new Insert(table); + PartialRow row = oneKeyInsert.getRow(); + row.addString("key", "foo"); + assertBytesEquals(row.encodePrimaryKey(), "foo"); + + Schema schemaTwoString = buildSchema( + new ColumnSchema.ColumnSchemaBuilder("key", Type.STRING).key(true), + new ColumnSchema.ColumnSchemaBuilder("key2", Type.STRING).key(true)); + KuduTable table2 = new KuduTable(null, "two", "two", schemaTwoString, + defaultPartitionSchema(schemaTwoString), 3, null, null, null); + Insert twoKeyInsert = new Insert(table2); + row = twoKeyInsert.getRow(); + row.addString("key", "foo"); + row.addString("key2", "bar"); + assertBytesEquals(row.encodePrimaryKey(), "foo\0\0bar"); + + Insert twoKeyInsertWithNull = new Insert(table2); + row = twoKeyInsertWithNull.getRow(); + row.addString("key", "xxx\0yyy"); + row.addString("key2", "bar"); + assertBytesEquals(row.encodePrimaryKey(), "xxx\0\1yyy\0\0bar"); + + // test that we get the correct memcmp result, the bytes are in big-endian order in a key + Schema schemaIntString = buildSchema( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true), + new ColumnSchema.ColumnSchemaBuilder("key2", Type.STRING).key(true)); + PartitionSchema partitionSchemaIntString = defaultPartitionSchema(schemaIntString); + KuduTable table3 = new KuduTable(null, "three", "three", + schemaIntString, partitionSchemaIntString, 3, null, null, null); + Insert small = new Insert(table3); + row = small.getRow(); + row.addInt("key", 20); + row.addString("key2", "data"); + byte[] smallPK = small.getRow().encodePrimaryKey(); + assertEquals(0, Bytes.memcmp(smallPK, smallPK)); + + Insert big = new Insert(table3); + row = big.getRow(); + row.addInt("key", 10000); + row.addString("key2", "data"); + byte[] bigPK = big.getRow().encodePrimaryKey(); + assertTrue(Bytes.memcmp(smallPK, bigPK) < 0); + assertTrue(Bytes.memcmp(bigPK, smallPK) > 0); + + // The following tests test our assumptions on unsigned data types sorting from KeyEncoder + byte four = 4; + byte onHundredTwentyFour = -4; + four = Bytes.xorLeftMostBit(four); + onHundredTwentyFour = Bytes.xorLeftMostBit(onHundredTwentyFour); + assertTrue(four < onHundredTwentyFour); + + byte[] threeHundred = Bytes.fromInt(300); + byte[] reallyBigNumber = Bytes.fromInt(-300); + threeHundred[0] = Bytes.xorLeftMostBit(threeHundred[0]); + reallyBigNumber[3] = Bytes.xorLeftMostBit(reallyBigNumber[3]); + assertTrue(Bytes.memcmp(threeHundred, reallyBigNumber) < 0); + } + + @Test + public void testPrimaryKeyEncoding() { + Schema schema = buildSchema( + new ColumnSchemaBuilder("int8", Type.INT8).key(true), + new ColumnSchemaBuilder("int16", Type.INT16).key(true), + new ColumnSchemaBuilder("int32", Type.INT32).key(true), + new ColumnSchemaBuilder("int64", Type.INT64).key(true), + new ColumnSchemaBuilder("decimal32", Type.DECIMAL).key(true) + .typeAttributes(DecimalUtil.typeAttributes(DecimalUtil.MAX_DECIMAL32_PRECISION, 0)), + new ColumnSchemaBuilder("decimal64", Type.DECIMAL).key(true) + .typeAttributes(DecimalUtil.typeAttributes(DecimalUtil.MAX_DECIMAL64_PRECISION, 0)), + new ColumnSchemaBuilder("decimal128", Type.DECIMAL).key(true) + .typeAttributes(DecimalUtil.typeAttributes(DecimalUtil.MAX_DECIMAL128_PRECISION, 0)), + new ColumnSchemaBuilder("varchar", Type.VARCHAR).key(true) + .typeAttributes(CharUtil.typeAttributes(10)), + new ColumnSchemaBuilder("string", Type.STRING).key(true), + new ColumnSchemaBuilder("binary", Type.BINARY).key(true)); + + PartialRow rowA = schema.newPartialRow(); + rowA.addByte("int8", Byte.MIN_VALUE); + rowA.addShort("int16", Short.MIN_VALUE); + rowA.addInt("int32", Integer.MIN_VALUE); + rowA.addLong("int64", Long.MIN_VALUE); + // Note: The decimal value is not the minimum of the underlying int32, int64, int128 type so + // we don't use "minimum" values in the test. + rowA.addDecimal("decimal32", BigDecimal.valueOf(5)); + rowA.addDecimal("decimal64", BigDecimal.valueOf(6)); + rowA.addDecimal("decimal128", BigDecimal.valueOf(7)); + rowA.addVarchar("varchar", ""); + rowA.addString("string", ""); + rowA.addBinary("binary", "".getBytes(UTF_8)); + + byte[] rowAEncoded = rowA.encodePrimaryKey(); + assertBytesEquals(rowAEncoded, + new byte[] { + 0, + 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + (byte) 0x80, 0, 0, 5, + (byte) 0x80, 0, 0, 0, 0, 0, 0, 6, + (byte) 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, + 0, 0, + 0, 0 + }); + assertEquals(rowA.stringifyRowKey(), + KeyEncoder.decodePrimaryKey(schema, rowAEncoded).stringifyRowKey()); + + PartialRow rowB = schema.newPartialRow(); + rowB.addByte("int8", Byte.MAX_VALUE); + rowB.addShort("int16", Short.MAX_VALUE); + rowB.addInt("int32", Integer.MAX_VALUE); + rowB.addLong("int64", Long.MAX_VALUE); + // Note: The decimal value is not the maximum of the underlying int32, int64, int128 type so + // we don't use "minimum" values in the test. + rowB.addDecimal("decimal32", BigDecimal.valueOf(5)); + rowB.addDecimal("decimal64", BigDecimal.valueOf(6)); + rowB.addDecimal("decimal128", BigDecimal.valueOf(7)); + rowB.addVarchar("varchar", "abc\1\0defghij"); + rowB.addString("string", "abc\1\0def"); + rowB.addBinary("binary", "\0\1binary".getBytes(UTF_8)); + + byte[] rowBEncoded = rowB.encodePrimaryKey(); + assertBytesEquals(rowB.encodePrimaryKey(), + new byte[] { + -1, + -1, -1, + -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + (byte) 0x80, 0, 0, 5, + (byte) 0x80, 0, 0, 0, 0, 0, 0, 6, + (byte) 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, + 'a', 'b', 'c', 1, 0, 1, 'd', 'e', 'f', 'g', 'h', 0, 0, + 'a', 'b', 'c', 1, 0, 1, 'd', 'e', 'f', 0, 0, + 0, 1, 'b', 'i', 'n', 'a', 'r', 'y', + }); + assertEquals(rowB.stringifyRowKey(), + KeyEncoder.decodePrimaryKey(schema, rowBEncoded).stringifyRowKey()); + + PartialRow rowC = schema.newPartialRow(); + rowC.addByte("int8", (byte) 1); + rowC.addShort("int16", (short) 2); + rowC.addInt("int32", 3); + rowC.addLong("int64", 4); + rowC.addDecimal("decimal32", BigDecimal.valueOf(5)); + rowC.addDecimal("decimal64", BigDecimal.valueOf(6)); + rowC.addDecimal("decimal128", BigDecimal.valueOf(7)); + rowC.addVarchar("varchar", "abc\n12345678"); + rowC.addString("string", "abc\n123"); + rowC.addBinary("binary", "\0\1\2\3\4\5".getBytes(UTF_8)); + + byte[] rowCEncoded = rowC.encodePrimaryKey(); + assertBytesEquals(rowCEncoded, + new byte[] { + (byte) 0x81, + (byte) 0x80, 2, + (byte) 0x80, 0, 0, 3, + (byte) 0x80, 0, 0, 0, 0, 0, 0, 4, + (byte) 0x80, 0, 0, 5, + (byte) 0x80, 0, 0, 0, 0, 0, 0, 6, + (byte) 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, + 'a', 'b', 'c', '\n', '1', '2', '3', '4', '5', '6', 0, 0, + 'a', 'b', 'c', '\n', '1', '2', '3', 0, 0, + 0, 1, 2, 3, 4, 5, + }); + assertEquals(rowC.stringifyRowKey(), + KeyEncoder.decodePrimaryKey(schema, rowCEncoded).stringifyRowKey()); + + PartialRow rowD = schema.newPartialRow(); + rowD.addByte("int8", (byte) -1); + rowD.addShort("int16", (short) -2); + rowD.addInt("int32", -3); + rowD.addLong("int64", -4); + rowD.addDecimal("decimal32", BigDecimal.valueOf(-5)); + rowD.addDecimal("decimal64", BigDecimal.valueOf(-6)); + rowD.addDecimal("decimal128", BigDecimal.valueOf(-7)); + rowD.addVarchar("varchar", "\0abc\n\1\1\0 123\1\0"); + rowD.addString("string", "\0abc\n\1\1\0 123\1\0"); + rowD.addBinary("binary", "\0\1\2\3\4\5\0".getBytes(UTF_8)); + + byte[] rowDEncoded = rowD.encodePrimaryKey(); + assertBytesEquals(rowDEncoded, + new byte[] { + (byte) 127, + (byte) 127, -2, + (byte) 127, -1, -1, -3, + (byte) 127, -1, -1, -1, -1, -1, -1, -4, + (byte) 127, -1, -1, -5, + (byte) 127, -1, -1, -1, -1, -1, -1, -6, + (byte) 127, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -7, + 0, 1, 'a', 'b', 'c', '\n', 1, 1, 0, 1, ' ', '1', 0, 0, + 0, 1, 'a', 'b', 'c', '\n', 1, 1, 0, 1, ' ', '1', '2', '3', 1, 0, 1, 0, 0, + 0, 1, 2, 3, 4, 5, 0, + }); + assertEquals(rowD.stringifyRowKey(), + KeyEncoder.decodePrimaryKey(schema, rowDEncoded).stringifyRowKey()); + } + + @Test + public void testPartitionKeyEncoding() { + Schema schema = buildSchema( + new ColumnSchemaBuilder("a", Type.INT32).key(true), + new ColumnSchemaBuilder("b", Type.STRING).key(true), + new ColumnSchemaBuilder("c", Type.STRING).key(true)); + + final PartitionSchema partitionSchema = + new PartitionSchema(new RangeSchema(ImmutableList.of(0, 1, 2)), + ImmutableList.of( + new HashBucketSchema(ImmutableList.of(0, 1), 32, 0), + new HashBucketSchema(ImmutableList.of(2), 32, 42)), + schema); + + PartialRow rowA = schema.newPartialRow(); + rowA.addInt("a", 0); + rowA.addString("b", ""); + rowA.addString("c", ""); + assertBytesEquals(KeyEncoder.encodePartitionKey(rowA, partitionSchema), + new byte[]{ + 0, 0, 0, 0, // hash(0, "") + 0, 0, 0, 0x14, // hash("") + (byte) 0x80, 0, 0, 0, // a = 0 + 0, 0, // b = ""; c is elided + }); + + PartialRow rowB = schema.newPartialRow(); + rowB.addInt("a", 1); + rowB.addString("b", ""); + rowB.addString("c", ""); + assertBytesEquals(KeyEncoder.encodePartitionKey(rowB, partitionSchema), + new byte[]{ + 0, 0, 0, 0x5, // hash(1, "") + 0, 0, 0, 0x14, // hash("") + (byte) 0x80, 0, 0, 1, // a = 0 + 0, 0, // b = ""; c is elided + }); + + PartialRow rowC = schema.newPartialRow(); + rowC.addInt("a", 0); + rowC.addString("b", "b"); + rowC.addString("c", "c"); + assertBytesEquals(KeyEncoder.encodePartitionKey(rowC, partitionSchema), + new byte[]{ + 0, 0, 0, 0x1A, // hash(0, "b") + 0, 0, 0, 0x1D, // hash("c") + (byte) 0x80, 0, 0, 0, // a = 1 + 'b', 0, 0, // b = "b" + 'c' // b = "c" + }); + + PartialRow rowD = schema.newPartialRow(); + rowD.addInt("a", 1); + rowD.addString("b", "b"); + rowD.addString("c", "c"); + assertBytesEquals(KeyEncoder.encodePartitionKey(rowD, partitionSchema), + new byte[]{ + 0, 0, 0, 0, // hash(1, "b") + 0, 0, 0, 0x1D, // hash("c") + (byte) 0x80, 0, 0, 1, // a = 0 + 'b', 0, 0, // b = "b" + 'c' // b = "c" + }); + } + + @Test + public void testPartitionKeyEncodingCustomHashSchema() { + Schema schema = buildSchema( + new ColumnSchemaBuilder("a", Type.INT32).key(true), + new ColumnSchemaBuilder("b", Type.STRING).key(true), + new ColumnSchemaBuilder("c", Type.STRING).key(true)); + + PartialRow lower = schema.newPartialRow(); + lower.addInt("a", 0); + lower.addString("b", "B"); + lower.addString("c", "C"); + + PartialRow upper = schema.newPartialRow(); + upper.addInt("a", 10); + upper.addString("b", "b"); + upper.addString("c", "c"); + + final PartitionSchema partitionSchema = + new PartitionSchema( + new RangeSchema(ImmutableList.of(0, 1, 2)), + ImmutableList.of( + new HashBucketSchema(ImmutableList.of(2), 3, 0)), + ImmutableList.of( + new PartitionSchema.RangeWithHashSchema( + lower, + upper, + ImmutableList.of(new HashBucketSchema(ImmutableList.of(0, 1), 32, 0)))), + schema); + + // That's the row in the range having its own custom hash schema. + PartialRow rowA = schema.newPartialRow(); + rowA.addInt("a", 1); + rowA.addString("b", "C"); + rowA.addString("c", "D"); + assertBytesEquals(KeyEncoder.encodePartitionKey(rowA, partitionSchema), + new byte[]{ + 0, 0, 0, 0x10, // hash(1, "") + (byte) 0x80, 0, 0, 1, // a = 1 + 'C', 0, 0, // b = "C" + 'D' // c = "D" + }); + + // That's the row encoded with the table-wide hash schema. + PartialRow rowB = schema.newPartialRow(); + rowB.addInt("a", 11); + rowB.addString("b", ""); + rowB.addString("c", "d"); + assertBytesEquals(KeyEncoder.encodePartitionKey(rowB, partitionSchema), + new byte[]{ + 0, 0, 0, 0x2, // hash("d") + (byte) 0x80, 0, 0, 11,// a = 11 + 0, 0, // b = "" + 'd' // c = "d" + }); + } + + @Test(timeout = 100000) + public void testAllPrimaryKeyTypes() throws Exception { + Schema schema = buildSchema( + new ColumnSchemaBuilder("int8", Type.INT8).key(true), + new ColumnSchemaBuilder("int16", Type.INT16).key(true), + new ColumnSchemaBuilder("int32", Type.INT32).key(true), + new ColumnSchemaBuilder("int64", Type.INT64).key(true), + new ColumnSchemaBuilder("string", Type.STRING).key(true), + new ColumnSchemaBuilder("binary", Type.BINARY).key(true), + new ColumnSchemaBuilder("timestamp", Type.UNIXTIME_MICROS).key(true), + new ColumnSchemaBuilder("decimal32", Type.DECIMAL).key(true) + .typeAttributes(DecimalUtil.typeAttributes(DecimalUtil.MAX_DECIMAL32_PRECISION, 0)), + new ColumnSchemaBuilder("decimal64", Type.DECIMAL).key(true) + .typeAttributes(DecimalUtil.typeAttributes(DecimalUtil.MAX_DECIMAL64_PRECISION, 0)), + new ColumnSchemaBuilder("decimal128", Type.DECIMAL).key(true) + .typeAttributes(DecimalUtil.typeAttributes(DecimalUtil.MAX_DECIMAL128_PRECISION, 0)), + new ColumnSchemaBuilder("varchar", Type.VARCHAR).key(true) + .typeAttributes(CharUtil.typeAttributes(10)), + new ColumnSchemaBuilder("date", Type.DATE).key(true), + new ColumnSchemaBuilder("bool", Type.BOOL), // not primary key type + new ColumnSchemaBuilder("float", Type.FLOAT), // not primary key type + new ColumnSchemaBuilder("double", Type.DOUBLE)); // not primary key type + + KuduTable table = client.createTable("testAllPrimaryKeyTypes-" + System.currentTimeMillis(), + schema, defaultCreateTableOptions(schema)); + final KuduSession session = client.newSession(); + + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addByte(0, (byte) 1); + row.addShort(1, (short) 2); + row.addInt(2, 3); + row.addLong(3, 4L); + row.addString(4, "foo"); + row.addBinary(5, "bar".getBytes(UTF_8)); + row.addLong(6, 6L); + row.addDecimal(7, BigDecimal.valueOf(DecimalUtil.MAX_UNSCALED_DECIMAL32)); + row.addDecimal(8, BigDecimal.valueOf(DecimalUtil.MAX_UNSCALED_DECIMAL64)); + row.addDecimal(9, new BigDecimal(DecimalUtil.MAX_UNSCALED_DECIMAL128)); + row.addVarchar(10, "varchar bar"); + row.addDate(11, DateUtil.epochDaysToSqlDate(0)); + row.addBoolean(12, true); + row.addFloat(13, 7.8f); + row.addDouble(14, 9.9); + session.apply(insert); + session.close(); + + KuduScanner scanner = client.newScannerBuilder(table).build(); + while (scanner.hasMoreRows()) { + RowResultIterator it = scanner.nextRows(); + assertTrue(it.hasNext()); + RowResult rr = it.next(); + assertEquals((byte) 0x01, rr.getByte(0)); + assertEquals((short) 2, rr.getShort(1)); + assertEquals(3, rr.getInt(2)); + assertEquals(4L, rr.getLong(3)); + assertBytesEquals(rr.getBinaryCopy(4), "foo"); + assertBytesEquals(rr.getBinaryCopy(5), "bar"); + assertEquals(6L, rr.getLong(6)); + assertTrue(BigDecimal.valueOf(DecimalUtil.MAX_UNSCALED_DECIMAL32) + .compareTo(rr.getDecimal(7)) == 0); + assertTrue(BigDecimal.valueOf(DecimalUtil.MAX_UNSCALED_DECIMAL64) + .compareTo(rr.getDecimal(8)) == 0); + assertTrue(new BigDecimal(DecimalUtil.MAX_UNSCALED_DECIMAL128) + .compareTo(rr.getDecimal(9)) == 0); + assertEquals("varchar ba", rr.getVarchar(10)); + assertEquals(0, rr.getInt(11)); + assertTrue(rr.getBoolean(12)); + assertEquals(7.8f, rr.getFloat(13), .001f); + assertEquals(9.9, rr.getDouble(14), .001); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java new file mode 100644 index 0000000000..8f7073b13e --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java @@ -0,0 +1,2096 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.GREATER; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.GREATER_EQUAL; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.LESS; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.LESS_EQUAL; +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.createManyStringsSchema; +import static org.apache.kudu.test.ClientTestUtil.createManyVarcharsSchema; +import static org.apache.kudu.test.ClientTestUtil.createSchemaWithBinaryColumns; +import static org.apache.kudu.test.ClientTestUtil.createSchemaWithDateColumns; +import static org.apache.kudu.test.ClientTestUtil.createSchemaWithDecimalColumns; +import static org.apache.kudu.test.ClientTestUtil.createSchemaWithNonUniqueKey; +import static org.apache.kudu.test.ClientTestUtil.createSchemaWithTimestampColumns; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicTableOptionsWithNonCoveredRange; +import static org.apache.kudu.test.ClientTestUtil.scanTableToStrings; +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.junit.Assume.assumeTrue; + +import java.io.Closeable; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.math.BigDecimal; +import java.security.cert.CertificateException; +import java.sql.Date; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.protobuf.ByteString; +import com.stumbleupon.async.Deferred; +import io.netty.util.Timeout; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.test.CapturingLogAppender; +import org.apache.kudu.test.ClientTestUtil; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.KuduTestHarness.LocationConfig; +import org.apache.kudu.test.KuduTestHarness.MasterServerConfig; +import org.apache.kudu.test.KuduTestHarness.TabletServerConfig; +import org.apache.kudu.test.RandomUtils; +import org.apache.kudu.test.cluster.KuduBinaryInfo; +import org.apache.kudu.util.DateUtil; +import org.apache.kudu.util.DecimalUtil; +import org.apache.kudu.util.TimestampUtil; + +public class TestKuduClient { + private static final Logger LOG = LoggerFactory.getLogger(TestKuduClient.class); + + private static final String TABLE_NAME = "TestKuduClient"; + + private static final int SHORT_SCANNER_TTL_MS = 5000; + private static final int SHORT_SCANNER_GC_US = SHORT_SCANNER_TTL_MS * 100; // 10% of the TTL. + + private static final Schema basicSchema = ClientTestUtil.getBasicSchema(); + + private KuduClient client; + private AsyncKuduClient asyncClient; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + } + + /** + * Test setting and reading the most recent propagated timestamp. + */ + @Test(timeout = 100000) + public void testLastPropagatedTimestamps() throws Exception { + // Scan a table to ensure a timestamp is propagated. + KuduTable table = client.createTable(TABLE_NAME, basicSchema, getBasicCreateTableOptions()); + client.newScannerBuilder(table).build().nextRows().getNumRows(); + assertTrue(client.hasLastPropagatedTimestamp()); + assertTrue(client.hasLastPropagatedTimestamp()); + assertTrue(asyncClient.hasLastPropagatedTimestamp()); + + long initialTs = client.getLastPropagatedTimestamp(); + + // Check that the initial timestamp is consistent with the asynchronous client. + assertEquals(initialTs, client.getLastPropagatedTimestamp()); + assertEquals(initialTs, asyncClient.getLastPropagatedTimestamp()); + + // Attempt to change the timestamp to a lower value. This should not change + // the internal timestamp, as it must be monotonically increasing. + client.updateLastPropagatedTimestamp(initialTs - 1); + assertEquals(initialTs, client.getLastPropagatedTimestamp()); + assertEquals(initialTs, asyncClient.getLastPropagatedTimestamp()); + + // Use the synchronous client to update the last propagated timestamp and + // check with both clients that the timestamp was updated. + client.updateLastPropagatedTimestamp(initialTs + 1); + assertEquals(initialTs + 1, client.getLastPropagatedTimestamp()); + assertEquals(initialTs + 1, asyncClient.getLastPropagatedTimestamp()); + } + + /** + * Test creating and deleting a table through a KuduClient. + */ + @Test(timeout = 100000) + public void testCreateDeleteTable() throws Exception { + // Check that we can create a table. + client.createTable(TABLE_NAME, basicSchema, getBasicCreateTableOptions()); + assertFalse(client.getTablesList().getTablesList().isEmpty()); + assertTrue(client.getTablesList().getTablesList().contains(TABLE_NAME)); + + // Check that we can delete it. + client.deleteTable(TABLE_NAME); + assertFalse(client.getTablesList().getTablesList().contains(TABLE_NAME)); + + // Check that we can re-recreate it, with a different schema. + List columns = new ArrayList<>(basicSchema.getColumns()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("one more", Type.STRING).build()); + Schema newSchema = new Schema(columns); + client.createTable(TABLE_NAME, newSchema, getBasicCreateTableOptions()); + + // Check that we can open a table and see that it has the new schema. + KuduTable table = client.openTable(TABLE_NAME); + assertEquals(newSchema.getColumnCount(), table.getSchema().getColumnCount()); + assertTrue(table.getPartitionSchema().isSimpleRangePartitioning()); + + // Check that the block size parameter we specified in the schema is respected. + assertEquals(4096, newSchema.getColumn("column3_s").getDesiredBlockSize()); + assertEquals(ColumnSchema.Encoding.DICT_ENCODING, + newSchema.getColumn("column3_s").getEncoding()); + assertEquals(ColumnSchema.CompressionAlgorithm.LZ4, + newSchema.getColumn("column3_s").getCompressionAlgorithm()); + } + + /** + * Test recalling a soft deleted table through a KuduClient. + */ + @Test(timeout = 100000) + public void testRecallDeletedTable() throws Exception { + // Check that we can create a table. + assertTrue(client.getTablesList().getTablesList().isEmpty()); + final KuduTable table = client.createTable(TABLE_NAME, basicSchema, + getBasicCreateTableOptions()); + final String tableId = table.getTableId(); + assertEquals(1, client.getTablesList().getTablesList().size()); + assertEquals(TABLE_NAME, client.getTablesList().getTablesList().get(0)); + + // Check that we can delete it. + client.deleteTable(TABLE_NAME, 600); + List tables = client.getTablesList().getTablesList(); + assertEquals(0, tables.size()); + tables = client.getSoftDeletedTablesList().getTablesList(); + assertEquals(1, tables.size()); + String softDeletedTable = tables.get(0); + assertEquals(TABLE_NAME, softDeletedTable); + // Check that we can recall the soft_deleted table. + client.recallDeletedTable(tableId); + assertEquals(1, client.getTablesList().getTablesList().size()); + assertEquals(TABLE_NAME, client.getTablesList().getTablesList().get(0)); + + // Check that we can delete it. + client.deleteTable(TABLE_NAME, 600); + tables = client.getTablesList().getTablesList(); + assertEquals(0, tables.size()); + tables = client.getSoftDeletedTablesList().getTablesList(); + assertEquals(1, tables.size()); + softDeletedTable = tables.get(0); + assertEquals(TABLE_NAME, softDeletedTable); + // Check we can recall soft deleted table with new table name. + final String newTableName = "NewTable"; + client.recallDeletedTable(tableId, newTableName); + assertEquals(1, client.getTablesList().getTablesList().size()); + assertEquals(newTableName, client.getTablesList().getTablesList().get(0)); + } + + /** + * Test creating a table with various invalid schema cases. + */ + @Test(timeout = 100000) + public void testCreateTableTooManyColumns() throws Exception { + List cols = new ArrayList<>(); + cols.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.STRING) + .key(true) + .build()); + for (int i = 0; i < 1000; i++) { + // not null with default + cols.add(new ColumnSchema.ColumnSchemaBuilder("c" + i, Type.STRING) + .build()); + } + Schema schema = new Schema(cols); + try { + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + fail(); + } catch (NonRecoverableException nre) { + assertThat(nre.toString(), containsString( + "number of columns 1001 is greater than the permitted maximum")); + } + } + + /** + * Test creating and deleting a table with extra-configs through a KuduClient. + */ + @Test(timeout = 100000) + public void testCreateDeleteTableWitExtraConfigs() throws Exception { + // Check that we can create a table. + Map extraConfigs = new HashMap<>(); + extraConfigs.put("kudu.table.history_max_age_sec", "7200"); + + client.createTable( + TABLE_NAME, + basicSchema, + getBasicCreateTableOptions().setExtraConfigs(extraConfigs)); + + KuduTable table = client.openTable(TABLE_NAME); + extraConfigs = table.getExtraConfig(); + assertTrue(extraConfigs.containsKey("kudu.table.history_max_age_sec")); + assertEquals("7200", extraConfigs.get("kudu.table.history_max_age_sec")); + } + + /* + * Test the scanner behavior when a scanner is used beyond + * the scanner ttl without calling keepAlive. + */ + @Test(timeout = 100000) + @TabletServerConfig(flags = { + "--scanner_ttl_ms=" + SHORT_SCANNER_TTL_MS, + "--scanner_gc_check_interval_us=" + SHORT_SCANNER_GC_US, + }) + public void testScannerExpiration() throws Exception { + // Create a basic table and load it with data. + int numRows = 1000; + client.createTable( + TABLE_NAME, + basicSchema, + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + + for (int i = 0; i < numRows; i++) { + Insert insert = createBasicSchemaInsert(table, i); + session.apply(insert); + } + + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .replicaSelection(ReplicaSelection.CLOSEST_REPLICA) + .batchSizeBytes(100) // Use a small batch size so we can call nextRows many times. + .build(); + + // Initialize the scanner and verify we can read rows. + int rows = scanner.nextRows().getNumRows(); + assertTrue("Scanner did not read any rows", rows > 0); + + // Wait for the scanner to time out. + Thread.sleep(SHORT_SCANNER_TTL_MS * 2); + + try { + scanner.nextRows(); + fail("Exception was not thrown when accessing an expired scanner"); + } catch (NonRecoverableException ex) { + assertTrue("Expected Scanner not found error, got:\n" + ex.toString(), + ex.getMessage().matches(".*Scanner .* not found.*")); + } + + // Closing an expired scanner shouldn't throw an exception. + scanner.close(); + } + + /* + * Test keeping a scanner alive beyond scanner ttl. + */ + @Test(timeout = 100000) + @TabletServerConfig(flags = { + "--scanner_ttl_ms=" + SHORT_SCANNER_TTL_MS, + "--scanner_gc_check_interval_us=" + SHORT_SCANNER_GC_US, + }) + public void testKeepAlive() throws Exception { + // Create a basic table and load it with data. + int numRows = 1000; + client.createTable( + TABLE_NAME, + basicSchema, + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + + for (int i = 0; i < numRows; i++) { + Insert insert = createBasicSchemaInsert(table, i); + session.apply(insert); + } + + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .replicaSelection(ReplicaSelection.CLOSEST_REPLICA) + .batchSizeBytes(100) // Use a small batch size so we can call nextRows many times. + .build(); + + // KeepAlive on uninitialized scanner should be ok. + scanner.keepAlive(); + // Get the first batch and initialize the scanner + int accum = scanner.nextRows().getNumRows(); + + while (scanner.hasMoreRows()) { + int rows = scanner.nextRows().getNumRows(); + accum += rows; + // Break when we are between tablets. + if (scanner.currentTablet() == null) { + LOG.info(String.format("Between tablets after scanning %d rows", accum)); + break; + } + // Ensure we actually end up between tablets. + if (accum == numRows) { + fail("All rows were in a single tablet."); + } + } + + // In between scanners now and should be ok. + scanner.keepAlive(); + + // Initialize the next scanner or keepAlive will have no effect. + accum += scanner.nextRows().getNumRows(); + + // Wait for longer than the scanner ttl calling keepAlive throughout. + // Each loop sleeps 25% of the scanner ttl and we loop 10 times to ensure + // we extend over 2x the scanner ttl. + Random random = RandomUtils.getRandom(); + for (int i = 0; i < 10; i++) { + Thread.sleep(SHORT_SCANNER_TTL_MS / 4); + // Force 1/3 of the keepAlive requests to retry up to 3 times. + if (i % 3 == 0) { + RpcProxy.failNextRpcs(random.nextInt(4), + new RecoverableException(Status.ServiceUnavailable("testKeepAlive"))); + } + scanner.keepAlive(); + } + + // Finish out the rows. + while (scanner.hasMoreRows()) { + accum += scanner.nextRows().getNumRows(); + } + assertEquals("All rows were not scanned", numRows, accum); + + // At this point the scanner is closed and there is nothing to keep alive. + try { + scanner.keepAlive(); + fail("Exception was not thrown when calling keepAlive on a closed scanner"); + } catch (IllegalStateException ex) { + assertThat(ex.getMessage(), containsString("Scanner has already been closed")); + } + } + + /* + * Test keeping a scanner alive periodically beyond scanner ttl. + */ + @Test(timeout = 100000) + @TabletServerConfig(flags = { + "--scanner_ttl_ms=" + SHORT_SCANNER_TTL_MS / 5, + "--scanner_gc_check_interval_us=" + SHORT_SCANNER_GC_US, + }) + public void testKeepAlivePeriodically() throws Exception { + // Create a basic table and load it with data. + int numRows = 1000; + client.createTable( + TABLE_NAME, + basicSchema, + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 3)); + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + + for (int i = 0; i < numRows; i++) { + Insert insert = createBasicSchemaInsert(table, i); + session.apply(insert); + } + + // Start keep-alive timer and read all data out. After read out all data, + // the keep-alive timer will be cancelled. + { + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .replicaSelection(ReplicaSelection.CLOSEST_REPLICA) + .batchSizeBytes(100) + .build(); + + scanner.startKeepAlivePeriodically(SHORT_SCANNER_TTL_MS / 10); + int rowCount = 0; + while (scanner.hasMoreRows()) { + // Sleep a long time to make scanner easy to be expired. + Thread.sleep(SHORT_SCANNER_TTL_MS / 2); + rowCount += scanner.nextRows().getNumRows(); + } + assertEquals(numRows, rowCount); + // Check that keepAliveTimeout is cancelled. + Field fieldAsyncScanner = KuduScanner.class.getDeclaredField("asyncScanner"); + fieldAsyncScanner.setAccessible(true); + AsyncKuduScanner asyncScanner = (AsyncKuduScanner)fieldAsyncScanner.get(scanner); + Field fieldKeepaliveTimeout = + AsyncKuduScanner.class.getDeclaredField("keepAliveTimeout"); + fieldKeepaliveTimeout.setAccessible(true); + Timeout keepAliveTimeout = (Timeout)fieldKeepaliveTimeout.get(asyncScanner); + assertTrue(keepAliveTimeout.isCancelled()); + } + + // Start keep-alive timer then close it. After closing the client, + // the keep-alive timer will be closed. + { + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .replicaSelection(ReplicaSelection.CLOSEST_REPLICA) + .batchSizeBytes(100) + .build(); + + scanner.startKeepAlivePeriodically(SHORT_SCANNER_TTL_MS / 10); + + // Check that keepAliveTimeout is not cancelled. + Field fieldAsyncScanner = KuduScanner.class.getDeclaredField("asyncScanner"); + fieldAsyncScanner.setAccessible(true); + AsyncKuduScanner asyncScanner = (AsyncKuduScanner)fieldAsyncScanner.get(scanner); + Field fieldKeepaliveTimeout = + AsyncKuduScanner.class.getDeclaredField("keepAliveTimeout"); + fieldKeepaliveTimeout.setAccessible(true); + Timeout keepAliveTimeout = (Timeout)fieldKeepaliveTimeout.get(asyncScanner); + assertFalse(keepAliveTimeout.isCancelled()); + + // Check that keepAliveTimeout is cancelled. + scanner.close(); + assertTrue(keepAliveTimeout.isCancelled()); + } + } + + /* + * Test stoping the keep-alive timer. + */ + @Test(timeout = 100000) + @TabletServerConfig(flags = { + "--scanner_ttl_ms=" + SHORT_SCANNER_TTL_MS / 5, + "--scanner_gc_check_interval_us=" + SHORT_SCANNER_GC_US, + }) + public void testStopKeepAlivePeriodically() throws Exception { + // Create a basic table and load it with data. + int numRows = 1000; + client.createTable( + TABLE_NAME, + basicSchema, + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 3)); + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + + for (int i = 0; i < numRows; i++) { + Insert insert = createBasicSchemaInsert(table, i); + session.apply(insert); + } + + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .replicaSelection(ReplicaSelection.CLOSEST_REPLICA) + .batchSizeBytes(100) // Use a small batch size so we can call nextRows many times. + .build(); + // Start the keep-alive timer and then close it. Read data will timeout. + assertTrue(scanner.startKeepAlivePeriodically(SHORT_SCANNER_TTL_MS / 10)); + assertTrue(scanner.stopKeepAlivePeriodically()); + while (scanner.hasMoreRows()) { + try { + // Sleep a long time to make scanner easy to be expired. + Thread.sleep(SHORT_SCANNER_TTL_MS / 2); + scanner.nextRows(); + } catch (Exception e) { + assertTrue(e.toString().contains("not found (it may have expired)")); + break; + } + } + } + + /** + * Test creating a table with columns with different combinations of NOT NULL and + * default values, inserting rows, and checking the results are as expected. + * Regression test for KUDU-180. + */ + @Test(timeout = 100000) + public void testTableWithDefaults() throws Exception { + List cols = new ArrayList<>(); + cols.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.STRING) + .key(true) + .build()); + // nullable with no default + cols.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.STRING) + .nullable(true) + .build()); + // nullable with default + cols.add(new ColumnSchema.ColumnSchemaBuilder("c2", Type.STRING) + .nullable(true) + .defaultValue("def") + .build()); + // not null with no default + cols.add(new ColumnSchema.ColumnSchemaBuilder("c3", Type.STRING) + .nullable(false) + .build()); + // not null with default + cols.add(new ColumnSchema.ColumnSchemaBuilder("c4", Type.STRING) + .nullable(false) + .defaultValue("def") + .build()); + Schema schema = new Schema(cols); + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + + // Insert various rows. '-' indicates leaving the row unset in the insert. + List rows = ImmutableList.of( + // Specify all columns + "r1,a,b,c,d", + // Specify all, set nullable ones to NULL. + "r2,NULL,NULL,c,d", + // Don't specify any columns except for the one that is NOT NULL + // with no default. + "r3,-,-,c,-", + // Two rows which should not succeed. + "fail_1,a,b,c,NULL", + "fail_2,a,b,NULL,d"); + for (String row : rows) { + try { + String[] fields = row.split(",", -1); + Insert insert = table.newInsert(); + for (int i = 0; i < fields.length; i++) { + if (fields[i].equals("-")) { // leave unset + continue; + } + if (fields[i].equals("NULL")) { + insert.getRow().setNull(i); + } else { + insert.getRow().addString(i, fields[i]); + } + } + session.apply(insert); + } catch (IllegalArgumentException e) { + // We expect two of the inserts to fail when we try to set NULL values for + // nullable columns. + assertTrue(e.getMessage(), + e.getMessage().matches("c[34] cannot be set to null")); + } + } + session.flush(); + + // Check that we got the results we expected. + List expectedStrings = ImmutableList.of( + "STRING key=r1, STRING c1=a, STRING c2=b, STRING c3=c, STRING c4=d", + "STRING key=r2, STRING c1=NULL, STRING c2=NULL, STRING c3=c, STRING c4=d", + "STRING key=r3, STRING c1=NULL, STRING c2=def, STRING c3=c, STRING c4=def"); + List rowStrings = scanTableToStrings(table); + Collections.sort(rowStrings); + assertArrayEquals(rowStrings.toArray(new String[0]), + expectedStrings.toArray(new String[0])); + } + + /** + * Test inserting and retrieving VARCHAR columns. + */ + @Test(timeout = 100000) + public void testVarchars() throws Exception { + Schema schema = createManyVarcharsSchema(); + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + for (int i = 0; i < 100; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addVarchar("key", String.format("key_%02d", i)); + row.addVarchar("c2", "c2_" + i); + if (i % 2 == 1) { + row.addVarchar("c3", "c3_" + i); + } + row.addVarchar("c4", "c4_" + i); + // NOTE: we purposefully add the strings in a non-left-to-right + // order to verify that we still place them in the right position in + // the row. + row.addVarchar("c1", "c1_" + i); + session.apply(insert); + if (i % 50 == 0) { + session.flush(); + } + } + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(100, rowStrings.size()); + assertEquals( + "VARCHAR key(10)=key_03, VARCHAR c1(10)=c1_3, VARCHAR c2(10)=c2_3," + + " VARCHAR c3(10)=c3_3, VARCHAR c4(10)=c4_3", rowStrings.get(3)); + assertEquals( + "VARCHAR key(10)=key_04, VARCHAR c1(10)=c1_4, VARCHAR c2(10)=c2_4," + + " VARCHAR c3(10)=NULL, VARCHAR c4(10)=c4_4", rowStrings.get(4)); + + KuduScanner scanner = client.newScannerBuilder(table).build(); + + assertTrue("Scanner should have returned row", scanner.hasMoreRows()); + + RowResultIterator rows = scanner.nextRows(); + final RowResult next = rows.next(); + + // Do negative testing on string type. + try { + next.getInt("c2"); + fail("IllegalArgumentException was not thrown when accessing " + + "a VARCHAR column with getInt"); + } catch (IllegalArgumentException ignored) { + // ignored + } + } + + /** + * Test inserting and retrieving string columns. + */ + @Test(timeout = 100000) + public void testStrings() throws Exception { + Schema schema = createManyStringsSchema(); + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + for (int i = 0; i < 100; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", String.format("key_%02d", i)); + row.addString("c2", "c2_" + i); + if (i % 2 == 1) { + row.addString("c3", "c3_" + i); + } + row.addString("c4", "c4_" + i); + // NOTE: we purposefully add the strings in a non-left-to-right + // order to verify that we still place them in the right position in + // the row. + row.addString("c1", "c1_" + i); + session.apply(insert); + if (i % 50 == 0) { + session.flush(); + } + } + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(100, rowStrings.size()); + assertEquals( + "STRING key=key_03, STRING c1=c1_3, STRING c2=c2_3, STRING c3=c3_3, STRING c4=c4_3", + rowStrings.get(3)); + assertEquals( + "STRING key=key_04, STRING c1=c1_4, STRING c2=c2_4, STRING c3=NULL, STRING c4=c4_4", + rowStrings.get(4)); + + KuduScanner scanner = client.newScannerBuilder(table).build(); + + assertTrue("Scanner should have returned row", scanner.hasMoreRows()); + + RowResultIterator rows = scanner.nextRows(); + final RowResult next = rows.next(); + + // Do negative testing on string type. + try { + next.getInt("c2"); + fail("IllegalArgumentException was not thrown when accessing " + + "a string column with getInt"); + } catch (IllegalArgumentException ignored) { + // ignored + } + } + + /** + * Test to verify that we can write in and read back UTF8. + */ + @Test(timeout = 100000) + public void testUTF8() throws Exception { + Schema schema = createManyStringsSchema(); + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + + KuduTable table = client.openTable(TABLE_NAME); + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", "กขฃคฅฆง"); // some thai + row.addString("c1", "✁✂✃✄✆"); // some icons + + row.addString("c2", "hello"); // some normal chars + row.addString("c4", "🐱"); // supplemental plane + KuduSession session = client.newSession(); + session.apply(insert); + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(1, rowStrings.size()); + assertEquals( + "STRING key=กขฃคฅฆง, STRING c1=✁✂✃✄✆, STRING c2=hello, STRING c3=NULL, STRING c4=🐱", + rowStrings.get(0)); + } + + /** + * Test inserting and retrieving binary columns. + */ + @Test(timeout = 100000) + public void testBinaryColumns() throws Exception { + Schema schema = createSchemaWithBinaryColumns(); + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + + byte[] testArray = new byte[] {1, 2, 3, 4, 5, 6 ,7, 8, 9}; + + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + for (int i = 0; i < 100; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addBinary("key", String.format("key_%02d", i).getBytes(UTF_8)); + row.addString("c1", "✁✂✃✄✆"); + row.addDouble("c2", i); + if (i % 2 == 1) { + row.addBinary("c3", testArray); + } + session.apply(insert); + if (i % 50 == 0) { + session.flush(); + } + } + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(100, rowStrings.size()); + for (int i = 0; i < rowStrings.size(); i++) { + StringBuilder expectedRow = new StringBuilder(); + expectedRow.append(String.format("BINARY key=\"key_%02d\", STRING c1=✁✂✃✄✆, DOUBLE c2=%.1f," + + " BINARY c3=", i, (double) i)); + if (i % 2 == 1) { + expectedRow.append(Bytes.pretty(testArray)); + } else { + expectedRow.append("NULL"); + } + assertEquals(expectedRow.toString(), rowStrings.get(i)); + } + } + + /** + * Test inserting and retrieving timestamp columns. + */ + @Test(timeout = 100000) + public void testTimestampColumns() throws Exception { + Schema schema = createSchemaWithTimestampColumns(); + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + + List timestamps = new ArrayList<>(); + + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + long lastTimestamp = 0; + for (int i = 0; i < 100; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + long timestamp = System.currentTimeMillis() * 1000; + while (timestamp == lastTimestamp) { + timestamp = System.currentTimeMillis() * 1000; + } + timestamps.add(timestamp); + row.addLong("key", timestamp); + if (i % 2 == 1) { + row.addLong("c1", timestamp); + } + session.apply(insert); + if (i % 50 == 0) { + session.flush(); + } + lastTimestamp = timestamp; + } + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(100, rowStrings.size()); + for (int i = 0; i < rowStrings.size(); i++) { + StringBuilder expectedRow = new StringBuilder(); + expectedRow.append(String.format("UNIXTIME_MICROS key=%s, UNIXTIME_MICROS c1=", + TimestampUtil.timestampToString(timestamps.get(i)))); + if (i % 2 == 1) { + expectedRow.append(TimestampUtil.timestampToString(timestamps.get(i))); + } else { + expectedRow.append("NULL"); + } + assertEquals(expectedRow.toString(), rowStrings.get(i)); + } + } + + /** + * Test inserting and retrieving date columns. + */ + @Test(timeout = 100000) + public void testDateColumns() throws Exception { + Schema schema = createSchemaWithDateColumns(); + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + + List dates = new ArrayList<>(); + + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + for (int i = 0; i < 100; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + dates.add(i); + Date date = DateUtil.epochDaysToSqlDate(i); + row.addDate("key", date); + if (i % 2 == 1) { + row.addDate("c1", date); + } + session.apply(insert); + if (i % 50 == 0) { + session.flush(); + } + } + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(100, rowStrings.size()); + for (int i = 0; i < rowStrings.size(); i++) { + String sdate = DateUtil.epochDaysToDateString(dates.get(i)); + StringBuilder expectedRow = new StringBuilder(); + expectedRow.append(String.format("DATE key=%s, DATE c1=", sdate)); + if (i % 2 == 1) { + expectedRow.append(sdate); + } else { + expectedRow.append("NULL"); + } + assertEquals(expectedRow.toString(), rowStrings.get(i)); + } + } + + /** + * Test inserting and retrieving decimal columns. + */ + @Test(timeout = 100000) + public void testDecimalColumns() throws Exception { + Schema schema = createSchemaWithDecimalColumns(); + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + + // Verify ColumnTypeAttributes + assertEquals(DecimalUtil.MAX_DECIMAL128_PRECISION, + table.getSchema().getColumn("c1").getTypeAttributes().getPrecision()); + + for (int i = 0; i < 9; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addDecimal("key", BigDecimal.valueOf(i)); + if (i % 2 == 1) { + row.addDecimal("c1", BigDecimal.valueOf(i)); + } + session.apply(insert); + } + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(9, rowStrings.size()); + for (int i = 0; i < rowStrings.size(); i++) { + StringBuilder expectedRow = new StringBuilder(); + expectedRow.append(String.format("DECIMAL key(18, 0)=%s, DECIMAL c1(38, 0)=", + String.valueOf(i))); + if (i % 2 == 1) { + expectedRow.append(i); + } else { + expectedRow.append("NULL"); + } + assertEquals(expectedRow.toString(), rowStrings.get(i)); + } + } + + /** + * Test creating a table with non unique primary key in the table schema. + */ + @Test(timeout = 100000) + public void testCreateTableWithNonUniquePrimaryKeys() throws Exception { + // Create a schema with non unique primary key column + Schema schema = createSchemaWithNonUniqueKey(); + assertFalse(schema.isPrimaryKeyUnique()); + // Verify auto-incrementing column is in the schema + assertTrue(schema.hasAutoIncrementingColumn()); + assertEquals(3, schema.getColumnCount()); + assertEquals(2, schema.getPrimaryKeyColumnCount()); + assertEquals(1, schema.getColumnIndex(Schema.getAutoIncrementingColumnName())); + // Create a table + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + + KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + + // Verify that the primary key is not unique, and an auto-incrementing column is + // added as key column in the position after all key columns. + schema = table.getSchema(); + assertFalse(schema.isPrimaryKeyUnique()); + assertTrue(schema.hasAutoIncrementingColumn()); + assertEquals(3, schema.getColumnCount()); + assertEquals(2, schema.getPrimaryKeyColumnCount()); + assertEquals(1, schema.getColumnIndex(Schema.getAutoIncrementingColumnName())); + assertTrue(schema.getColumn(Schema.getAutoIncrementingColumnName()).isKey()); + assertTrue(schema.getColumn( + Schema.getAutoIncrementingColumnName()).isAutoIncrementing()); + + // Insert rows into the table without assigning values for the auto-incrementing + // column. + for (int i = 0; i < 3; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt("key", i); + row.addInt("c1", i * 10); + session.apply(insert); + } + session.flush(); + + // Scan all the rows in the table with all columns. + // Verify that the auto-incrementing column is included in the rows. + List rowStrings = scanTableToStrings(table); + assertEquals(3, rowStrings.size()); + for (int i = 0; i < rowStrings.size(); i++) { + StringBuilder expectedRow = new StringBuilder(); + expectedRow.append(String.format("INT32 key=%d, INT64 %s=%d, INT32 c1=%d", + i, Schema.getAutoIncrementingColumnName(), i + 1, i * 10)); + assertEquals(expectedRow.toString(), rowStrings.get(i)); + } + + // Update "c1" column of the first row with "key" and auto-incrementing columns. + Update update = table.newUpdate(); + PartialRow row = update.getRow(); + row.addInt(schema.getColumnByIndex(0).getName(), 0); + row.addLong(schema.getColumnByIndex(1).getName(), 1); + row.addInt(schema.getColumnByIndex(2).getName(), 100); + session.apply(update); + session.flush(); + + // Scan all the rows in the table without the auto-incrementing column. + // Verify that "c1" column of the first row is updated. + KuduScanner.KuduScannerBuilder scanBuilder = client.newScannerBuilder(table); + KuduScanner scanner = + scanBuilder.setProjectedColumnNames(Lists.newArrayList("key", "c1")).build(); + rowStrings.clear(); + for (RowResult r : scanner) { + rowStrings.add(r.rowToString()); + } + Collections.sort(rowStrings); + assertEquals(3, rowStrings.size()); + for (int i = 0; i < rowStrings.size(); i++) { + StringBuilder expectedRow = new StringBuilder(); + if (i == 0) { + expectedRow.append(String.format("INT32 key=0, INT32 c1=100")); + } else { + expectedRow.append(String.format("INT32 key=%d, INT32 c1=%d", i, i * 10)); + } + assertEquals(expectedRow.toString(), rowStrings.get(i)); + } + + // Upsert rows into the table after assigning values for the auto-incrementing + // column. The first three rows will be applied as updates and the next three as + // inserts. + for (int i = 0; i < 6; i++) { + Upsert upsert = table.newUpsert(); + row = upsert.getRow(); + row.addInt("key", i); + row.addLong(Schema.getAutoIncrementingColumnName(), i + 1); + row.addInt("c1", i * 20); + session.apply(upsert); + } + session.flush(); + + // Scan all the rows in the table with all columns. + // Verify that the auto-incrementing column is included in the rows. + rowStrings = scanTableToStrings(table); + assertEquals(6, rowStrings.size()); + for (int i = 0; i < rowStrings.size(); i++) { + String expectedRow = String.format("INT32 key=%d, INT64 %s=%d, INT32 c1=%d", + i, Schema.getAutoIncrementingColumnName(), i + 1, i * 20); + assertEquals(expectedRow, rowStrings.get(i)); + } + + // Delete the first row with "key" and auto-incrementing columns. + // Verify that number of rows is decreased by 1. + Delete delete = table.newDelete(); + row = delete.getRow(); + row.addInt(schema.getColumnByIndex(0).getName(), 0); + row.addLong(schema.getColumnByIndex(1).getName(), 1); + session.apply(delete); + session.flush(); + assertEquals(5, countRowsInScan(client.newScannerBuilder(table).build())); + + // Check that we can delete the table. + client.deleteTable(TABLE_NAME); + } + + /** + * Test operations for table with auto-incrementing column. + */ + @Test(timeout = 100000) + public void testTableWithAutoIncrementingColumn() throws Exception { + // Create a schema with non unique primary key column + Schema schema = createSchemaWithNonUniqueKey(); + assertFalse(schema.isPrimaryKeyUnique()); + // Verify auto-incrementing column is in the schema + assertTrue(schema.hasAutoIncrementingColumn()); + assertEquals(3, schema.getColumnCount()); + assertEquals(2, schema.getPrimaryKeyColumnCount()); + // Create a table + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + + final KuduSession session = client.newSession(); + KuduTable table = client.openTable(TABLE_NAME); + schema = table.getSchema(); + assertTrue(schema.hasAutoIncrementingColumn()); + + // Verify that UPSERT is allowed for table with auto-incrementing column + Upsert upsert = table.newUpsert(); + PartialRow rowUpsert = upsert.getRow(); + rowUpsert.addInt("key", 0); + rowUpsert.addLong(Schema.getAutoIncrementingColumnName(), 1); + rowUpsert.addInt("c1", 10); + session.apply(upsert); + + // Verify that UPSERT_IGNORE is allowed for table with auto-incrementing column + UpsertIgnore upsertIgnore = table.newUpsertIgnore(); + PartialRow rowUpsertIgnore = upsertIgnore.getRow(); + rowUpsertIgnore.addInt("key", 1); + rowUpsertIgnore.addLong(Schema.getAutoIncrementingColumnName(), 2); + rowUpsertIgnore.addInt("c1", 20); + session.apply(upsertIgnore); + + // Change desired block size for auto-incrementing column + client.alterTable(TABLE_NAME, new AlterTableOptions().changeDesiredBlockSize( + Schema.getAutoIncrementingColumnName(), 1)); + // Change encoding for auto-incrementing column + client.alterTable(TABLE_NAME, new AlterTableOptions().changeEncoding( + Schema.getAutoIncrementingColumnName(), ColumnSchema.Encoding.PLAIN_ENCODING)); + // Change compression algorithm for auto-incrementing column + client.alterTable(TABLE_NAME, new AlterTableOptions().changeCompressionAlgorithm( + Schema.getAutoIncrementingColumnName(), ColumnSchema.CompressionAlgorithm.NO_COMPRESSION)); + session.flush(); + + // Verify that auto-incrementing column value cannot be specified in an INSERT operation. + try { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt("key", 1); + row.addLong(Schema.getAutoIncrementingColumnName(), 1); + row.addInt("c1", 10); + session.apply(insert); + fail("INSERT on table with auto-incrementing column set"); + } catch (KuduException ex) { + assertTrue(ex.getMessage().contains("Auto-Incrementing column should not " + + "be specified for INSERT operation")); + } + + // Verify that auto-incrementing column value cannot be specified in an INSERT_IGNORE operation. + try { + InsertIgnore insertIgnore = table.newInsertIgnore(); + PartialRow row = insertIgnore.getRow(); + row.addInt("key", 1); + row.addLong(Schema.getAutoIncrementingColumnName(), 1); + row.addInt("c1", 10); + session.apply(insertIgnore); + fail("INSERT on table with auto-incrementing column set"); + } catch (KuduException ex) { + assertTrue(ex.getMessage().contains("Auto-Incrementing column should not " + + "be specified for INSERT operation")); + } + // Verify that auto-incrementing column cannot be added + try { + client.alterTable(TABLE_NAME, new AlterTableOptions().addColumn( + Schema.getAutoIncrementingColumnName(), Schema.getAutoIncrementingColumnType(), 0)); + fail("Add auto-incrementing column"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Column name " + + Schema.getAutoIncrementingColumnName() + " is reserved by Kudu engine")); + } + try { + client.alterTable(TABLE_NAME, new AlterTableOptions().addColumn( + new ColumnSchema.AutoIncrementingColumnSchemaBuilder().build())); + fail("Add auto-incrementing column"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Column name " + + Schema.getAutoIncrementingColumnName() + " is reserved by Kudu engine")); + } + + // Verify that auto-incrementing column cannot be removed + try { + client.alterTable(TABLE_NAME, new AlterTableOptions().dropColumn( + Schema.getAutoIncrementingColumnName())); + fail("Drop auto-incrementing column"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Cannot remove auto-incrementing column " + + Schema.getAutoIncrementingColumnName())); + } + + // Verify that auto-incrementing column cannot be renamed + try { + client.alterTable(TABLE_NAME, new AlterTableOptions().renameColumn( + Schema.getAutoIncrementingColumnName(), "new_auto_incrementing")); + fail("Rename auto-incrementing column"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Cannot rename auto-incrementing column " + + Schema.getAutoIncrementingColumnName())); + } + + // Verify that auto-incrementing column cannot be changed by removing default + try { + client.alterTable(TABLE_NAME, new AlterTableOptions().removeDefault( + Schema.getAutoIncrementingColumnName())); + fail("Remove default value for auto-incrementing column"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Auto-incrementing column " + + Schema.getAutoIncrementingColumnName() + " does not have default value")); + } + + // Verify that auto-incrementing column cannot be changed with default value + try { + client.alterTable(TABLE_NAME, new AlterTableOptions().changeDefault( + Schema.getAutoIncrementingColumnName(), 0)); + fail("Change default value for auto-incrementing column"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Cannot set default value for " + + "auto-incrementing column " + Schema.getAutoIncrementingColumnName())); + } + + // Verify that auto-incrementing column cannot be changed for its immutable + try { + client.alterTable(TABLE_NAME, new AlterTableOptions().changeImmutable( + Schema.getAutoIncrementingColumnName(), true)); + fail("Change immutable for auto-incrementing column"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Cannot change immutable for " + + "auto-incrementing column " + Schema.getAutoIncrementingColumnName())); + } + + client.deleteTable(TABLE_NAME); + } + + /** + * Test inserting and retrieving rows from a table that has a range partition + * with custom hash schema. + */ + @Test(timeout = 100000) + public void testRangeWithCustomHashSchema() throws Exception { + List cols = new ArrayList<>(); + cols.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT64).key(true).build()); + cols.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32).nullable(true).build()); + Schema schema = new Schema(cols); + + CreateTableOptions options = new CreateTableOptions(); + options.setRangePartitionColumns(ImmutableList.of("c0")); + options.addHashPartitions(ImmutableList.of("c0"), 2); + + // Add range partition with table-wide hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addLong("c0", -100); + PartialRow upper = schema.newPartialRow(); + upper.addLong("c0", 100); + options.addRangePartition(lower, upper); + } + + // Add a partition with custom hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addLong("c0", 100); + PartialRow upper = schema.newPartialRow(); + upper.addLong("c0", 200); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("c0"), 5, 0); + options.addRangePartition(rangePartition); + } + + client.createTable(TABLE_NAME, schema, options); + + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + KuduTable table = client.openTable(TABLE_NAME); + + // Check the range with the table-wide hash schema. + { + for (int i = 0; i < 10; ++i) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addLong("c0", i); + row.addInt("c1", 1000 * i); + session.apply(insert); + } + + // Scan all the rows in the table. + List rowStringsAll = scanTableToStrings(table); + assertEquals(10, rowStringsAll.size()); + + // Now scan the rows that are in the range with the table-wide hash schema. + List rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("c0"), GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(schema.getColumn("c0"), LESS, 100)); + assertEquals(10, rowStrings.size()); + for (int i = 0; i < rowStrings.size(); ++i) { + StringBuilder expectedRow = new StringBuilder(); + expectedRow.append(String.format("INT64 c0=%d, INT32 c1=%d", i, 1000 * i)); + assertEquals(expectedRow.toString(), rowStrings.get(i)); + } + } + + // Check the range with the custom hash schema. + { + for (int i = 100; i < 110; ++i) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addLong("c0", i); + row.addInt("c1", 2 * i); + session.apply(insert); + } + + // Scan all the rows in the table. + List rowStringsAll = scanTableToStrings(table); + assertEquals(20, rowStringsAll.size()); + + // Now scan the rows that are in the range with the custom hash schema. + List rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("c0"), GREATER_EQUAL, 100)); + assertEquals(10, rowStrings.size()); + for (int i = 0; i < rowStrings.size(); ++i) { + StringBuilder expectedRow = new StringBuilder(); + expectedRow.append(String.format("INT64 c0=%d, INT32 c1=%d", + i + 100, 2 * (i + 100))); + assertEquals(expectedRow.toString(), rowStrings.get(i)); + } + } + } + + /** + * Test scanning with limits. + */ + @Test + public void testScanWithLimit() throws Exception { + AsyncKuduClient asyncClient = harness.getAsyncClient(); + client.createTable(TABLE_NAME, basicSchema, getBasicTableOptionsWithNonCoveredRange()); + KuduTable table = client.openTable(TABLE_NAME); + KuduSession session = client.newSession(); + int numRows = 100; + for (int key = 0; key < numRows; key++) { + session.apply(createBasicSchemaInsert(table, key)); + } + + // Test with some non-positive limits, expecting to raise an exception. + int[] nonPositives = { -1, 0 }; + for (int limit : nonPositives) { + try { + client.newScannerBuilder(table).limit(limit).build(); + fail(); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Need a strictly positive number")); + } + } + + // Test with a limit and ensure we get the expected number of rows. + int[] limits = { numRows - 1, numRows, numRows + 1 }; + for (int limit : limits) { + KuduScanner scanner = client.newScannerBuilder(table) + .limit(limit) + .build(); + int count = 0; + while (scanner.hasMoreRows()) { + count += scanner.nextRows().getNumRows(); + } + assertEquals(String.format("Limit %d returned %d/%d rows", limit, count, numRows), + Math.min(numRows, limit), count); + } + + // Now test with limits for async scanners. + for (int limit : limits) { + AsyncKuduScanner scanner = new AsyncKuduScanner.AsyncKuduScannerBuilder(asyncClient, table) + .limit(limit) + .build(); + assertEquals(Math.min(limit, numRows), countRowsInScan(scanner)); + } + } + + /** + * Test scanning with predicates. + */ + @Test + public void testScanWithPredicates() throws Exception { + Schema schema = createManyStringsSchema(); + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + KuduTable table = client.openTable(TABLE_NAME); + for (int i = 0; i < 100; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", String.format("key_%02d", i)); + row.addString("c1", "c1_" + i); + row.addString("c2", "c2_" + i); + if (i % 2 == 0) { + row.addString("c3", "c3_" + i); + } + session.apply(insert); + } + session.flush(); + + assertEquals(100, scanTableToStrings(table).size()); + assertEquals(50, scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("key"), GREATER_EQUAL, "key_50") + ).size()); + assertEquals(25, scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("key"), GREATER, "key_74") + ).size()); + assertEquals(25, scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("key"), GREATER, "key_24"), + KuduPredicate.newComparisonPredicate(schema.getColumn("c1"), LESS_EQUAL, "c1_49") + ).size()); + assertEquals(50, scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("key"), GREATER, "key_24"), + KuduPredicate.newComparisonPredicate(schema.getColumn("key"), GREATER_EQUAL, "key_50") + ).size()); + assertEquals(0, scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("c1"), GREATER, "c1_30"), + KuduPredicate.newComparisonPredicate(schema.getColumn("c2"), LESS, "c2_20") + ).size()); + assertEquals(0, scanTableToStrings(table, + // Short circuit scan + KuduPredicate.newComparisonPredicate(schema.getColumn("c2"), GREATER, "c2_30"), + KuduPredicate.newComparisonPredicate(schema.getColumn("c2"), LESS, "c2_20") + ).size()); + + // IS NOT NULL + assertEquals(100, scanTableToStrings(table, + KuduPredicate.newIsNotNullPredicate(schema.getColumn("c1")), + KuduPredicate.newIsNotNullPredicate(schema.getColumn("key")) + ).size()); + assertEquals(50, scanTableToStrings(table, + KuduPredicate.newIsNotNullPredicate(schema.getColumn("c3")) + ).size()); + + // IS NULL + assertEquals(0, scanTableToStrings(table, + KuduPredicate.newIsNullPredicate(schema.getColumn("c2")), + KuduPredicate.newIsNullPredicate(schema.getColumn("key")) + ).size()); + assertEquals(50, scanTableToStrings(table, + KuduPredicate.newIsNullPredicate(schema.getColumn("c3")) + ).size()); + + // IN list + assertEquals(3, scanTableToStrings(table, + KuduPredicate.newInListPredicate(schema.getColumn("key"), + ImmutableList.of("key_30", "key_01", "invalid", "key_99")) + ).size()); + assertEquals(3, scanTableToStrings(table, + KuduPredicate.newInListPredicate(schema.getColumn("c2"), + ImmutableList.of("c2_30", "c2_1", "invalid", "c2_99")) + ).size()); + assertEquals(2, scanTableToStrings(table, + KuduPredicate.newInListPredicate(schema.getColumn("c2"), + ImmutableList.of("c2_30", "c2_1", "invalid", "c2_99")), + KuduPredicate.newIsNotNullPredicate(schema.getColumn("c2")), + KuduPredicate.newInListPredicate(schema.getColumn("key"), + ImmutableList.of("key_30", "key_45", "invalid", "key_99")) + ).size()); + } + + @Test + public void testGetAuthnToken() throws Exception { + byte[] token = asyncClient.exportAuthenticationCredentials().join(); + assertNotNull(token); + } + + + /** + * Counts the rows in a table between two optional bounds. + * @param table the table to scan, must have the basic schema + * @param lowerBound an optional lower bound key + * @param upperBound an optional upper bound key + * @return the row count + * @throws Exception on error + */ + private int countRowsForTestScanNonCoveredTable(KuduTable table, + Integer lowerBound, + Integer upperBound) throws Exception { + + KuduScanner.KuduScannerBuilder scanBuilder = client.newScannerBuilder(table); + if (lowerBound != null) { + PartialRow bound = basicSchema.newPartialRow(); + bound.addInt(0, lowerBound); + scanBuilder.lowerBound(bound); + } + if (upperBound != null) { + PartialRow bound = basicSchema.newPartialRow(); + bound.addInt(0, upperBound); + scanBuilder.exclusiveUpperBound(bound); + } + + KuduScanner scanner = scanBuilder.build(); + int count = 0; + while (scanner.hasMoreRows()) { + count += scanner.nextRows().getNumRows(); + } + return count; + } + + /** + * Tests scanning a table with non-covering range partitions. + */ + @Test(timeout = 100000) + public void testScanNonCoveredTable() throws Exception { + client.createTable(TABLE_NAME, basicSchema, getBasicTableOptionsWithNonCoveredRange()); + + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + KuduTable table = client.openTable(TABLE_NAME); + + for (int key = 0; key < 100; key++) { + session.apply(createBasicSchemaInsert(table, key)); + } + for (int key = 200; key < 300; key++) { + session.apply(createBasicSchemaInsert(table, key)); + } + session.flush(); + assertEquals(0, session.countPendingErrors()); + + assertEquals(200, countRowsForTestScanNonCoveredTable(table, null, null)); + assertEquals(100, countRowsForTestScanNonCoveredTable(table, null, 200)); + assertEquals(0, countRowsForTestScanNonCoveredTable(table, null, -1)); + assertEquals(0, countRowsForTestScanNonCoveredTable(table, 120, 180)); + assertEquals(0, countRowsForTestScanNonCoveredTable(table, 300, null)); + } + + /** + * Creates a local aclient that we auto-close while buffering one row, then makes sure that after + * closing that we can read the row. + */ + @Test(timeout = 100000) + public void testAutoClose() throws Exception { + try (KuduClient localClient = + new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()).build()) { + localClient.createTable(TABLE_NAME, basicSchema, getBasicCreateTableOptions()); + KuduTable table = localClient.openTable(TABLE_NAME); + KuduSession session = localClient.newSession(); + + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + Insert insert = createBasicSchemaInsert(table, 0); + session.apply(insert); + } + + KuduTable table = client.openTable(TABLE_NAME); + AsyncKuduScanner scanner = + new AsyncKuduScanner.AsyncKuduScannerBuilder(asyncClient, table).build(); + assertEquals(1, countRowsInScan(scanner)); + } + + /** + * Regression test for some log spew which occurred in short-lived client instances which + * had outbound connections. + */ + @Test(timeout = 100000) + public void testCloseShortlyAfterOpen() throws Exception { + CapturingLogAppender cla = new CapturingLogAppender(); + try (Closeable c = cla.attach()) { + try (KuduClient localClient = + new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()).build()) { + // Force the client to connect to the masters. + localClient.exportAuthenticationCredentials(); + } + // Wait a little for exceptions to come in from threads that don't get + // synchronously joined by client.close(). + Thread.sleep(500); + } + // Ensure there is no log spew due to an unexpected lost connection. + assertFalse(cla.getAppendedText(), cla.getAppendedText().contains("Exception")); + } + + /** + * Test that, if the masters are down when we attempt to connect, we don't end up + * logging any nonsensical stack traces including Netty internals. + */ + @Test(timeout = 100000) + public void testNoLogSpewOnConnectionRefused() throws Exception { + CapturingLogAppender cla = new CapturingLogAppender(); + try (Closeable c = cla.attach()) { + harness.killAllMasterServers(); + try (KuduClient localClient = + new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()).build()) { + // Force the client to connect to the masters. + localClient.exportAuthenticationCredentials(); + fail("Should have failed to connect."); + } catch (NonRecoverableException e) { + assertTrue("Bad exception string: " + e.getMessage(), + e.getMessage().matches(".*Master config .+ has no leader. " + + "Exceptions received:.*Connection refused.*Connection refused" + + ".*Connection refused.*")); + } + } finally { + harness.startAllMasterServers(); + } + // Ensure there is no log spew due to an unexpected lost connection. + String logText = cla.getAppendedText(); + assertFalse("Should not claim to have lost a connection in the log", + logText.contains("lost connection to peer")); + assertFalse("Should not have netty spew in log", + logText.contains("socket.nio.AbstractNioSelector")); + } + + @Test(timeout = 100000) + public void testCustomNioExecutor() throws Exception { + long startTime = System.nanoTime(); + try (KuduClient localClient = + new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()) + .nioExecutors(Executors.newFixedThreadPool(1), + Executors.newFixedThreadPool(2)) + .bossCount(1) + .workerCount(2) + .build()) { + long buildTime = (System.nanoTime() - startTime) / 1000000000L; + assertTrue("Building KuduClient is slow, maybe netty get stuck", buildTime < 3); + localClient.createTable(TABLE_NAME, basicSchema, getBasicCreateTableOptions()); + Thread[] threads = new Thread[4]; + for (int t = 0; t < 4; t++) { + final int id = t; + threads[t] = new Thread(new Runnable() { + @Override + public void run() { + try { + KuduTable table = localClient.openTable(TABLE_NAME); + KuduSession session = localClient.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + for (int i = 0; i < 100; i++) { + Insert insert = createBasicSchemaInsert(table, id * 100 + i); + session.apply(insert); + } + session.close(); + } catch (Exception e) { + fail("insert thread should not throw exception: " + e); + } + } + }); + threads[t].start(); + } + for (int t = 0; t < 4; t++) { + threads[t].join(); + } + } + } + + @Test(expected = IllegalArgumentException.class) + public void testNoDefaultPartitioning() throws Exception { + client.createTable(TABLE_NAME, basicSchema, new CreateTableOptions()); + } + + @Test(timeout = 100000) + public void testOpenTableClearsNonCoveringRangePartitions() throws KuduException { + CreateTableOptions options = getBasicCreateTableOptions(); + PartialRow lower = basicSchema.newPartialRow(); + PartialRow upper = basicSchema.newPartialRow(); + lower.addInt("key", 0); + upper.addInt("key", 1); + options.addRangePartition(lower, upper); + + client.createTable(TABLE_NAME, basicSchema, options); + KuduTable table = client.openTable(TABLE_NAME); + + // Count the number of tablets. + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + List tokens = tokenBuilder.build(); + assertEquals(1, tokens.size()); + + // Add a range partition with a separate client. The new client is necessary + // in order to avoid clearing the meta cache as part of the alter operation. + try (KuduClient alterClient = + new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()) + .defaultAdminOperationTimeoutMs(KuduTestHarness.DEFAULT_SLEEP) + .build()) { + lower = basicSchema.newPartialRow(); + upper = basicSchema.newPartialRow(); + lower.addInt("key", 1); + AlterTableOptions alter = new AlterTableOptions(); + alter.addRangePartition(lower, upper); + alterClient.alterTable(TABLE_NAME, alter); + } + + // Count the number of tablets. The result should still be the same, since + // the new tablet is still cached as a non-covered range. + tokenBuilder = client.newScanTokenBuilder(table); + tokens = tokenBuilder.build(); + assertEquals(1, tokens.size()); + + // Reopen the table and count the tablets again. The new tablet should now show up. + table = client.openTable(TABLE_NAME); + tokenBuilder = client.newScanTokenBuilder(table); + tokens = tokenBuilder.build(); + assertEquals(2, tokens.size()); + } + + @Test(timeout = 100000) + public void testCreateTableWithConcurrentInsert() throws Exception { + KuduTable table = client.createTable( + TABLE_NAME, createManyStringsSchema(), getBasicCreateTableOptions().setWait(false)); + + // Insert a row. + // + // It's very likely that the tablets are still being created, but the client + // should transparently retry the insert (and associated master lookup) + // until the operation succeeds. + Insert insert = table.newInsert(); + insert.getRow().addString("key", "key_0"); + insert.getRow().addString("c1", "c1_0"); + insert.getRow().addString("c2", "c2_0"); + KuduSession session = client.newSession(); + OperationResponse resp = session.apply(insert); + assertFalse(resp.hasRowError()); + + // This won't do anything useful (i.e. if the insert succeeds, we know the + // table has been created), but it's here for additional code coverage. + assertTrue(client.isCreateTableDone(TABLE_NAME)); + } + + @Test(timeout = 100000) + public void testCreateTableWithConcurrentAlter() throws Exception { + // Kick off an asynchronous table creation. + Deferred d = asyncClient.createTable(TABLE_NAME, + createManyStringsSchema(), getBasicCreateTableOptions()); + + // Rename the table that's being created to make sure it doesn't interfere + // with the "wait for all tablets to be created" behavior of createTable(). + // + // We have to retry this in a loop because we might run before the table + // actually exists. + while (true) { + try { + client.alterTable(TABLE_NAME, + new AlterTableOptions().renameTable("foo")); + break; + } catch (KuduException e) { + if (!e.getStatus().isNotFound()) { + throw e; + } + } + } + + // If createTable() was disrupted by the alterTable(), this will throw. + d.join(); + } + + // This is a test that verifies, when multiple clients run + // simultaneously, a client can get read-your-writes and + // read-your-reads session guarantees using READ_YOUR_WRITES + // scan mode, from leader replica. In this test writes are + // performed in AUTO_FLUSH_SYNC (single operation) flush modes. + @Test(timeout = 100000) + public void testReadYourWritesSyncLeaderReplica() throws Exception { + readYourWrites(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC, + ReplicaSelection.LEADER_ONLY); + } + + // Similar test as above but scan from the closest replica. + @Test(timeout = 100000) + public void testReadYourWritesSyncClosestReplica() throws Exception { + readYourWrites(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC, + ReplicaSelection.CLOSEST_REPLICA); + } + + // Similar to testReadYourWritesSyncLeaderReplica, but in this + // test writes are performed in MANUAL_FLUSH (batches) flush modes. + @Test(timeout = 100000) + public void testReadYourWritesBatchLeaderReplica() throws Exception { + readYourWrites(SessionConfiguration.FlushMode.MANUAL_FLUSH, + ReplicaSelection.LEADER_ONLY); + } + + // Similar test as above but scan from the closest replica. + @Test(timeout = 100000) + public void testReadYourWritesBatchClosestReplica() throws Exception { + readYourWrites(SessionConfiguration.FlushMode.MANUAL_FLUSH, + ReplicaSelection.CLOSEST_REPLICA); + } + + private void readYourWrites(final SessionConfiguration.FlushMode flushMode, + final ReplicaSelection replicaSelection) + throws Exception { + Schema schema = createManyStringsSchema(); + client.createTable(TABLE_NAME, schema, getBasicCreateTableOptions()); + + final int tasksNum = 4; + List> callables = new ArrayList<>(); + for (int t = 0; t < tasksNum; t++) { + Callable callable = new Callable() { + @Override + public Void call() throws Exception { + // Create a new client. + AsyncKuduClient asyncKuduClient = new AsyncKuduClient + .AsyncKuduClientBuilder(harness.getMasterAddressesAsString()) + .defaultAdminOperationTimeoutMs(KuduTestHarness.DEFAULT_SLEEP) + .build(); + // From the same client continuously performs inserts to a tablet + // in the given flush mode. + try (KuduClient kuduClient = asyncKuduClient.syncClient()) { + KuduSession session = kuduClient.newSession(); + session.setFlushMode(flushMode); + KuduTable table = kuduClient.openTable(TABLE_NAME); + for (int i = 0; i < 3; i++) { + for (int j = 100 * i; j < 100 * (i + 1); j++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", String.format("key_%02d", j)); + row.addString("c1", "c1_" + j); + row.addString("c2", "c2_" + j); + row.addString("c3", "c3_" + j); + session.apply(insert); + } + session.flush(); + + // Perform a bunch of READ_YOUR_WRITES scans to all the replicas + // that count the rows. And verify that the count of the rows + // never go down from what previously observed, to ensure subsequent + // reads will not "go back in time" regarding writes that other + // clients have done. + for (int k = 0; k < 3; k++) { + AsyncKuduScanner scanner = asyncKuduClient.newScannerBuilder(table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .replicaSelection(replicaSelection) + .build(); + KuduScanner syncScanner = new KuduScanner(scanner); + long preTs = asyncKuduClient.getLastPropagatedTimestamp(); + assertNotEquals(AsyncKuduClient.NO_TIMESTAMP, preTs); + + long rowCount = countRowsInScan(syncScanner); + long expectedCount = 100L * (i + 1); + assertTrue(expectedCount <= rowCount); + + // After the scan, verify that the chosen snapshot timestamp is + // returned from the server and it is larger than the previous + // propagated timestamp. + assertNotEquals(AsyncKuduClient.NO_TIMESTAMP, scanner.getSnapshotTimestamp()); + assertTrue(preTs < scanner.getSnapshotTimestamp()); + syncScanner.close(); + } + } + } + return null; + } + }; + callables.add(callable); + } + ExecutorService executor = Executors.newFixedThreadPool(tasksNum); + List> futures = executor.invokeAll(callables); + + // Waits for the spawn tasks to complete, and then retrieves the results. + // Any exceptions or assertion errors in the spawn tasks will be thrown here. + for (Future future : futures) { + future.get(); + } + } + + private void runTestCallDuringLeaderElection(String clientMethodName) throws Exception { + // This bit of reflection helps us avoid duplicating test code. + Method methodToInvoke = KuduClient.class.getMethod(clientMethodName); + + for (int i = 0; i < 5; i++) { + try (KuduClient cl = new KuduClient.KuduClientBuilder( + harness.getMasterAddressesAsString()).build()) { + harness.restartLeaderMaster(); + + // There's a good chance that this executes while there's no leader + // master. It should retry until the leader election completes and a new + // leader master is elected. + methodToInvoke.invoke(cl); + } + } + + // With all masters down, exportAuthenticationCredentials() should time out. + harness.killAllMasterServers(); + try (KuduClient cl = new KuduClient.KuduClientBuilder( + harness.getMasterAddressesAsString()) + .defaultAdminOperationTimeoutMs(5000) // speed up the test + .build()) { + try { + methodToInvoke.invoke(cl); + fail(); + } catch (InvocationTargetException ex) { + assertTrue(ex.getTargetException() instanceof KuduException); + KuduException realEx = (KuduException) ex.getTargetException(); + assertTrue(realEx.getStatus().isTimedOut()); + } + } + } + + @Test(timeout = 100000) + public void testExportAuthenticationCredentialsDuringLeaderElection() throws Exception { + runTestCallDuringLeaderElection("exportAuthenticationCredentials"); + } + + @Test(timeout = 100000) + public void testGetHiveMetastoreConfigDuringLeaderElection() throws Exception { + runTestCallDuringLeaderElection("getHiveMetastoreConfig"); + } + + /** + * Test assignment of a location to the client. + */ + @Test(timeout = 100000) + public void testClientLocationNoLocation() throws Exception { + // Do something that will cause the client to connect to the cluster. + client.listTabletServers(); + assertEquals("", client.getLocationString()); + } + + @Test(timeout = 100000) + @LocationConfig(locations = { + "/L0:6", // 3 masters, 1 client, 3 tablet servers: 3 * 1 + 3 = 6. + }) + @MasterServerConfig(flags = { + "--master_client_location_assignment_enabled=true", + }) + public void testClientLocation() throws Exception { + // Do something that will cause the client to connect to the cluster. + client.listTabletServers(); + assertEquals("/L0", client.getLocationString()); + } + + @Test(timeout = 100000) + public void testClusterId() throws Exception { + assertTrue(client.getClusterId().isEmpty()); + // Do something that will cause the client to connect to the cluster. + client.listTabletServers(); + assertFalse(client.getClusterId().isEmpty()); + } + + @Test(timeout = 100000) + public void testSessionOnceClosed() throws Exception { + client.createTable(TABLE_NAME, basicSchema, getBasicCreateTableOptions()); + KuduTable table = client.openTable(TABLE_NAME); + KuduSession session = client.newSession(); + + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + Insert insert = createBasicSchemaInsert(table, 0); + session.apply(insert); + session.close(); + assertTrue(session.isClosed()); + + insert = createBasicSchemaInsert(table, 1); + CapturingLogAppender cla = new CapturingLogAppender(); + try (Closeable c = cla.attach()) { + session.apply(insert); + } + String loggedText = cla.getAppendedText(); + assertTrue("Missing warning:\n" + loggedText, + loggedText.contains("this is unsafe")); + } + + @Test(timeout = 100000) + public void testSchemaDriftPattern() throws Exception { + KuduTable table = client.createTable( + TABLE_NAME, createManyStringsSchema(), getBasicCreateTableOptions().setWait(false)); + KuduSession session = client.newSession(); + + // Insert a row. + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", "key_0"); + row.addString("c1", "c1_0"); + row.addString("c2", "c2_0"); + row.addString("c3", "c3_0"); + row.addString("c4", "c4_0"); + OperationResponse resp = session.apply(insert); + assertFalse(resp.hasRowError()); + + // Insert a row with an extra column. + boolean retried = false; + while (true) { + try { + Insert insertExtra = table.newInsert(); + PartialRow rowExtra = insertExtra.getRow(); + rowExtra.addString("key", "key_1"); + rowExtra.addString("c1", "c1_1"); + rowExtra.addString("c2", "c2_1"); + rowExtra.addString("c3", "c2_1"); + rowExtra.addString("c4", "c2_1"); + rowExtra.addString("c5", "c5_1"); + OperationResponse respExtra = session.apply(insertExtra); + assertFalse(respExtra.hasRowError()); + break; + } catch (IllegalArgumentException e) { + if (retried) { + throw e; + } + // Add the missing column and retry. + if (e.getMessage().contains("Unknown column")) { + client.alterTable(TABLE_NAME, new AlterTableOptions() + .addNullableColumn("c5", Type.STRING)); + // We need to re-open the table to ensure it has the new schema. + table = client.openTable(TABLE_NAME); + retried = true; + } else { + throw e; + } + } + } + // Make sure we actually retried. + assertTrue(retried); + + // Insert a row with the old schema. + Insert insertOld = table.newInsert(); + PartialRow rowOld = insertOld.getRow(); + rowOld.addString("key", "key_3"); + rowOld.addString("c1", "c1_3"); + rowOld.addString("c2", "c2_3"); + rowOld.addString("c3", "c3_3"); + rowOld.addString("c4", "c4_3"); + OperationResponse respOld = session.apply(insertOld); + assertFalse(respOld.hasRowError()); + } + + /** + * This is a test scenario to reproduce conditions described in KUDU-3277. + * The scenario was failing before the fix: + * ** 'java.lang.AssertionError: This Deferred was already called' was + * encountered multiple times with the stack exactly as described in + * KUDU-3277 + * ** some flusher threads were unable to join since KuduSession.flush() + * would hang (i.e. would not return) + */ + @MasterServerConfig(flags = { + // A shorter TTL for tablet locations is necessary to induce more frequent + // calls to TabletLookupCB.queueBuffer(). + "--table_locations_ttl_ms=500", + }) + @Test(timeout = 100000) + public void testConcurrentFlush() throws Exception { + // This is a very intensive and stressful test scenario, so run it only + // against Kudu binaries built without sanitizers. + assumeTrue("this scenario is to run against non-sanitized binaries only", + KuduBinaryInfo.getSanitizerType() == KuduBinaryInfo.SanitizerType.NONE); + try { + AsyncKuduSession.injectLatencyBufferFlushCb(true); + + CreateTableOptions opts = new CreateTableOptions() + .addHashPartitions(ImmutableList.of("key"), 8) + .setRangePartitionColumns(ImmutableList.of("key")); + + Schema schema = ClientTestUtil.getBasicSchema(); + PartialRow lowerBoundA = schema.newPartialRow(); + PartialRow upperBoundA = schema.newPartialRow(); + upperBoundA.addInt("key", 0); + opts.addRangePartition(lowerBoundA, upperBoundA); + + PartialRow lowerBoundB = schema.newPartialRow(); + lowerBoundB.addInt("key", 0); + PartialRow upperBoundB = schema.newPartialRow(); + opts.addRangePartition(lowerBoundB, upperBoundB); + + KuduTable table = client.createTable(TABLE_NAME, schema, opts); + + final CountDownLatch keepRunning = new CountDownLatch(1); + final int numSessions = 50; + + List sessions = new ArrayList<>(numSessions); + for (int i = 0; i < numSessions; ++i) { + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + sessions.add(session); + } + + List flushers = new ArrayList<>(numSessions); + Random random = RandomUtils.getRandom(); + { + for (int idx = 0; idx < numSessions; ++idx) { + final int threadIdx = idx; + Thread flusher = new Thread(new Runnable() { + @Override + public void run() { + KuduSession session = sessions.get(threadIdx); + try { + while (!keepRunning.await(random.nextInt(250), TimeUnit.MILLISECONDS)) { + session.flush(); + assertEquals(0, session.countPendingErrors()); + } + } catch (Exception e) { + fail("unexpected exception: " + e); + } + } + }); + flushers.add(flusher); + } + } + + final int numRowsPerSession = 10000; + final CountDownLatch insertersCompleted = new CountDownLatch(numSessions); + List inserters = new ArrayList<>(numSessions); + { + for (int idx = 0; idx < numSessions; ++idx) { + final int threadIdx = idx; + final int keyStart = threadIdx * numRowsPerSession; + Thread inserter = new Thread(new Runnable() { + @Override + public void run() { + KuduSession session = sessions.get(threadIdx); + try { + for (int key = keyStart; key < keyStart + numRowsPerSession; ++key) { + Insert insert = ClientTestUtil.createBasicSchemaInsert(table, key); + assertNull(session.apply(insert)); + } + session.flush(); + } catch (Exception e) { + fail("unexpected exception: " + e); + } + insertersCompleted.countDown(); + } + }); + inserters.add(inserter); + } + } + + for (Thread flusher : flushers) { + flusher.start(); + } + for (Thread inserter : inserters) { + inserter.start(); + } + + // Wait for the inserter threads to finish. + insertersCompleted.await(); + // Signal the flusher threads to stop. + keepRunning.countDown(); + + for (Thread inserter : inserters) { + inserter.join(); + } + for (Thread flusher : flushers) { + flusher.join(); + } + + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .build(); + assertEquals(numSessions * numRowsPerSession, countRowsInScan(scanner)); + } finally { + AsyncKuduSession.injectLatencyBufferFlushCb(false); + } + } + + @Test(timeout = 50000) + public void testImportInvalidCert() throws Exception { + // An empty certificate to import. + byte[] caCert = new byte[0]; + CertificateException e = assertThrows(CertificateException.class, () -> { + client.trustedCertificates(Arrays.asList(ByteString.copyFrom(caCert))); + }); + assertTrue(e.getMessage().contains("Could not parse certificate")); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduMetrics.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduMetrics.java new file mode 100644 index 0000000000..09b989d6bf --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduMetrics.java @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; + +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.noop.NoopCounter; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.KuduTestHarness; + +public class TestKuduMetrics { + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Test + public void testDisabledMetrics() throws Exception { + KuduMetrics.setEnabled(false); + + // Creating a meter results in a no-op meter that always returns 0. + Counter foo = + KuduMetrics.counter(KuduMetrics.RPC_RESPONSE_METRIC, KuduMetrics.CLIENT_ID_TAG, "foo"); + Assert.assertTrue(foo instanceof NoopCounter); + foo.increment(); + Assert.assertEquals(0, (int) foo.count()); + + // The registry doesn't have any meters. + Assert.assertEquals(0, KuduMetrics.numMetrics()); + } + + @Test + public void testClientIdFilter() throws Exception { + KuduClient c1 = new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()).build(); + KuduClient c2 = new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()).build(); + + c1.createTable("c1-table", getBasicSchema(), getBasicCreateTableOptions()); + c2.createTable("c2-table", getBasicSchema(), getBasicCreateTableOptions()); + + String c1Id = c1.getClientId(); + String c2Id = c1.getClientId(); + + int totalNumMetrics = KuduMetrics.numMetrics(); + int c1NumMetrics = KuduMetrics.numMetrics(KuduMetrics.CLIENT_ID_TAG, c1Id); + int c2NumMetrics = KuduMetrics.numMetrics(KuduMetrics.CLIENT_ID_TAG, c2Id); + + KuduMetrics.logMetrics(); // Log the metric values to help debug failures. + Assert.assertEquals(totalNumMetrics, c1NumMetrics + c2NumMetrics); + + // Disable the metrics and validate they are cleared. + KuduMetrics.setEnabled(false); + Assert.assertEquals(0, KuduMetrics.numMetrics()); + // Re-enable and verify they remain cleared. + KuduMetrics.setEnabled(true); + Assert.assertEquals(0, KuduMetrics.numMetrics()); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduPartitioner.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduPartitioner.java new file mode 100644 index 0000000000..6c5c063b79 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduPartitioner.java @@ -0,0 +1,210 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.apache.kudu.test.ClientTestUtil.getBasicTableOptionsWithNonCoveredRange; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.Schema; +import org.apache.kudu.test.KuduTestHarness; + +public class TestKuduPartitioner { + + private KuduClient client; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + } + + @Test + public void testPartitioner() throws Exception { + // Create a table with the following 9 partitions: + // + // hash bucket + // key 0 1 2 + // ----------------- + // <3333 x x x + // 3333-6666 x x x + // >=6666 x x x + Schema basicSchema = getBasicSchema(); + final int numRanges = 3; + final int numHashPartitions = 3; + final String tableName = "TestPartitioner"; + + List splitRows = new ArrayList<>(); + for (int split : Arrays.asList(3333, 6666)) { + PartialRow row = basicSchema.newPartialRow(); + row.addInt("key", split); + splitRows.add(row); + } + + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.addHashPartitions(Collections.singletonList("key"), numHashPartitions); + createOptions.setRangePartitionColumns(Collections.singletonList("key")); + for (PartialRow row : splitRows) { + createOptions.addSplitRow(row); + } + + KuduTable table = client.createTable(tableName, basicSchema, createOptions); + Schema schema = table.getSchema(); + KuduPartitioner part = new KuduPartitioner.KuduPartitionerBuilder(table).build(); + + assertEquals(numRanges * numHashPartitions, part.numPartitions()); + + // Partition a bunch of rows, counting how many fall into each partition. + int numRowsToPartition = 10000; + int[] countsByPartition = new int[part.numPartitions()]; + Arrays.fill(countsByPartition, 0); + for (int i = 0; i < numRowsToPartition; i++) { + PartialRow row = schema.newPartialRow(); + row.addInt("key", i); + int partIndex = part.partitionRow(row); + countsByPartition[partIndex]++; + } + + // We don't expect a completely even division of rows into partitions, but + // we should be within 10% of that. + int expectedPerPartition = numRowsToPartition / part.numPartitions(); + int fuzziness = expectedPerPartition / 10; + int minPerPartition = expectedPerPartition - fuzziness; + int maxPerPartition = expectedPerPartition + fuzziness; + for (int i = 0; i < part.numPartitions(); i++) { + assertTrue(minPerPartition <= countsByPartition[i]); + assertTrue(maxPerPartition >= countsByPartition[i]); + } + + // Drop the first and third range partition. + AlterTableOptions alterOptions = new AlterTableOptions(); + alterOptions.dropRangePartition(basicSchema.newPartialRow(), splitRows.get(0)); + alterOptions.dropRangePartition(splitRows.get(1), basicSchema.newPartialRow()); + client.alterTable(tableName, alterOptions); + + // The existing partitioner should still return results based on the table + // state at the time it was created, and successfully return partitions + // for rows in the now-dropped range. + assertEquals(numRanges * numHashPartitions, part.numPartitions()); + PartialRow row = schema.newPartialRow(); + row.addInt("key", 1000); + assertEquals(0, part.partitionRow(row)); + + // If we recreate the partitioner, it should get the new partitioning info. + part = new KuduPartitioner.KuduPartitionerBuilder(table).build(); + assertEquals(numHashPartitions, part.numPartitions()); + } + + @Test + public void testPartitionerNonCoveredRange() throws Exception { + final Schema basicSchema = getBasicSchema(); + final int numHashPartitions = 3; + final String tableName = "TestPartitionerNonCoveredRange"; + + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.addHashPartitions(Collections.singletonList("key"), numHashPartitions); + createOptions.setRangePartitionColumns(Collections.singletonList("key")); + // Cover a range where 1000 <= key < 2000 + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt("key", 1000); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt("key", 2000); + createOptions.addRangePartition(lower, upper); + + KuduTable table = client.createTable(tableName, basicSchema, createOptions); + Schema schema = table.getSchema(); + KuduPartitioner part = new KuduPartitioner.KuduPartitionerBuilder(table).build(); + + try { + PartialRow under = schema.newPartialRow(); + under.addInt("key", 999); + part.partitionRow(under); + fail("partitionRow did not throw a NonCoveredRangeException"); + } catch (NonCoveredRangeException ex) { + // Expected + } + + try { + PartialRow over = schema.newPartialRow(); + over.addInt("key", 999); + part.partitionRow(over); + fail("partitionRow did not throw a NonCoveredRangeException"); + } catch (NonCoveredRangeException ex) { + assertTrue(ex.getMessage().contains("does not exist in table")); + } + } + + @Test + public void testBuildTimeout() throws Exception { + Schema basicSchema = getBasicSchema(); + String tableName = "TestBuildTimeout"; + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.addHashPartitions(Collections.singletonList("key"), 3); + createOptions.setRangePartitionColumns(Collections.singletonList("key")); + KuduTable table = client.createTable(tableName, basicSchema, createOptions); + + // Ensure the table information can't be found to force a timeout. + harness.killAllMasterServers(); + + int timeoutMs = 2000; + long now = System.currentTimeMillis(); + try { + new KuduPartitioner.KuduPartitionerBuilder(table).buildTimeout(timeoutMs).build(); + fail("No NonRecoverableException was thrown"); + } catch (NonRecoverableException ex) { + assertTrue(ex.getMessage().startsWith("cannot complete before timeout")); + } + long elapsed = System.currentTimeMillis() - now; + long upperBound = timeoutMs * 2L; // Add 100% to avoid flakiness. + assertTrue(String.format("Elapsed time %d exceeded upper bound %d", elapsed, upperBound), + elapsed <= upperBound); + } + + @Test + public void testTableCache() throws Exception { + String tableName = "TestTableCache"; + KuduTable table = + client.createTable(tableName, getBasicSchema(), getBasicTableOptionsWithNonCoveredRange()); + + // Populate the table cache by building the partitioner once. + KuduPartitioner partitioner = new KuduPartitioner.KuduPartitionerBuilder(table).build(); + + // Ensure the remote table information can't be found. + harness.killAllMasterServers(); + + // This partitioner should build correctly because the table cache holds the partitions + // from the previous partitioner. + KuduPartitioner partitionerFromCache = + new KuduPartitioner.KuduPartitionerBuilder(table).build(); + + assertEquals(partitioner.numPartitions(), partitionerFromCache.numPartitions()); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduPredicate.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduPredicate.java new file mode 100644 index 0000000000..989c863b75 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduPredicate.java @@ -0,0 +1,1328 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.EQUAL; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.GREATER; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.GREATER_EQUAL; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.LESS; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.LESS_EQUAL; +import static org.apache.kudu.client.KuduPredicate.PredicateType.RANGE; + +import java.math.BigDecimal; +import java.sql.Date; +import java.util.Arrays; +import java.util.List; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Type; +import org.apache.kudu.test.junit.RetryRule; +import org.apache.kudu.util.CharUtil; +import org.apache.kudu.util.DateUtil; +import org.apache.kudu.util.DecimalUtil; + +public class TestKuduPredicate { + + private static final ColumnSchema boolCol = + new ColumnSchema.ColumnSchemaBuilder("bool", Type.BOOL).build(); + + private static final ColumnSchema byteCol = + new ColumnSchema.ColumnSchemaBuilder("byte", Type.INT8).build(); + + private static final ColumnSchema shortCol = + new ColumnSchema.ColumnSchemaBuilder("short", Type.INT16).build(); + + private static final ColumnSchema intCol = + new ColumnSchema.ColumnSchemaBuilder("int", Type.INT32).build(); + + private static final ColumnSchema longCol = + new ColumnSchema.ColumnSchemaBuilder("long", Type.INT64).build(); + + private static final ColumnSchema floatCol = + new ColumnSchema.ColumnSchemaBuilder("float", Type.FLOAT).build(); + + private static final ColumnSchema doubleCol = + new ColumnSchema.ColumnSchemaBuilder("double", Type.DOUBLE).build(); + + private static final ColumnSchema stringCol = + new ColumnSchema.ColumnSchemaBuilder("string", Type.STRING).nullable(true).build(); + + private static final ColumnSchema binaryCol = + new ColumnSchema.ColumnSchemaBuilder("binary", Type.BINARY).build(); + + private static final ColumnSchema decimal32Col = + new ColumnSchema.ColumnSchemaBuilder("decimal32", Type.DECIMAL) + .typeAttributes(DecimalUtil.typeAttributes(DecimalUtil.MAX_DECIMAL32_PRECISION, 2)) + .build(); + + private static final ColumnSchema decimal64Col = + new ColumnSchema.ColumnSchemaBuilder("decimal64", Type.DECIMAL) + .typeAttributes(DecimalUtil.typeAttributes(DecimalUtil.MAX_DECIMAL64_PRECISION, 2)) + .build(); + + private static final ColumnSchema decimal128Col = + new ColumnSchema.ColumnSchemaBuilder("decimal128", Type.DECIMAL) + .typeAttributes(DecimalUtil.typeAttributes(DecimalUtil.MAX_DECIMAL128_PRECISION, 2)) + .build(); + + private static final ColumnSchema varcharCol = + new ColumnSchema.ColumnSchemaBuilder("varchar", Type.VARCHAR) + .typeAttributes(CharUtil.typeAttributes(10)) + .nullable(true) + .build(); + + private static final ColumnSchema dateCol = + new ColumnSchema.ColumnSchemaBuilder("date", Type.DATE) + .nullable(true) + .build(); + + @Rule + public RetryRule retryRule = new RetryRule(); + + private static KuduPredicate intRange(int lower, int upper) { + Preconditions.checkArgument(lower < upper); + return new KuduPredicate(RANGE, intCol, Bytes.fromInt(lower), Bytes.fromInt(upper)); + } + + private static KuduPredicate intInList(Integer... values) { + return KuduPredicate.newInListPredicate(intCol, Arrays.asList(values)); + } + + private static KuduPredicate boolInList(Boolean... values) { + return KuduPredicate.newInListPredicate(boolCol, Arrays.asList(values)); + } + + private static KuduPredicate stringInList(String... values) { + return KuduPredicate.newInListPredicate(stringCol, Arrays.asList(values)); + } + + private void testMerge(KuduPredicate a, + KuduPredicate b, + KuduPredicate expected) { + + Assert.assertEquals(expected, a.merge(b)); + Assert.assertEquals(expected, b.merge(a)); + } + + /** + * Tests merges on all types of integer predicates. + */ + @Test + public void testMergeInt() { + + // Equality + Equality + //-------------------- + + // | + // | + // = + // | + testMerge(KuduPredicate.newComparisonPredicate(intCol, EQUAL, 0), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 0), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 0)); + // | + // | + // = + // None + testMerge(KuduPredicate.newComparisonPredicate(intCol, EQUAL, 0), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 1), + KuduPredicate.none(intCol)); + + // Range + Equality + //-------------------- + + // [--------> + // | + // = + // | + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 10), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 10)); + + // [--------> + // | + // = + // None + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 10), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 0), + KuduPredicate.none(intCol)); + + // <--------) + // | + // = + // | + testMerge(KuduPredicate.newComparisonPredicate(intCol, LESS, 10), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 5), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 5)); + + // <--------) + // | + // = + // None + testMerge(KuduPredicate.newComparisonPredicate(intCol, LESS, 0), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 10), + KuduPredicate.none(intCol)); + + // Unbounded Range + Unbounded Range + //-------------------- + + // [--------> AND + // [--------> + // = + // [--------> + + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 0)); + + // [--------> AND + // [-----> + // = + // [-----> + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 5), + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 5)); + + // <--------) AND + // <--------) + // = + // <--------) + + testMerge(KuduPredicate.newComparisonPredicate(intCol, LESS, 0), + KuduPredicate.newComparisonPredicate(intCol, LESS, 0), + KuduPredicate.newComparisonPredicate(intCol, LESS, 0)); + + // <--------) AND + // <----) + // = + // <----) + + testMerge(KuduPredicate.newComparisonPredicate(intCol, LESS, 0), + KuduPredicate.newComparisonPredicate(intCol, LESS, -10), + KuduPredicate.newComparisonPredicate(intCol, LESS, -10)); + + // [--------> AND + // <-------) + // = + // [----) + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(intCol, LESS, 10), + intRange(0, 10)); + + // [-----> AND + // <----) + // = + // | + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 5), + KuduPredicate.newComparisonPredicate(intCol, LESS, 6), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 5)); + + // [-----> AND + // <---) + // = + // None + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 5), + KuduPredicate.newComparisonPredicate(intCol, LESS, 5), + KuduPredicate.none(intCol)); + + // [-----> AND + // <---) + // = + // None + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 5), + KuduPredicate.newComparisonPredicate(intCol, LESS, 3), + KuduPredicate.none(intCol)); + + // Range + Range + //-------------------- + + // [--------) AND + // [--------) + // = + // [--------) + + testMerge(intRange(0, 10), + intRange(0, 10), + intRange(0, 10)); + + // [--------) AND + // [----) + // = + // [----) + testMerge(intRange(0, 10), + intRange(0, 5), + intRange(0, 5)); + + // [--------) AND + // [----) + // = + // [----) + testMerge(intRange(0, 10), + intRange(3, 8), + intRange(3, 8)); + + // [-----) AND + // [------) + // = + // [---) + testMerge(intRange(0, 8), + intRange(3, 10), + intRange(3, 8)); + // [--) AND + // [---) + // = + // None + testMerge(intRange(0, 5), + intRange(5, 10), + KuduPredicate.none(intCol)); + + // [--) AND + // [---) + // = + // None + testMerge(intRange(0, 3), + intRange(5, 10), + KuduPredicate.none(intCol)); + + // Lower Bound + Range + //-------------------- + + // [------------> + // [---) + // = + // [---) + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 0), + intRange(5, 10), + intRange(5, 10)); + + // [------------> + // [--------) + // = + // [--------) + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 5), + intRange(5, 10), + intRange(5, 10)); + + // [------------> + // [--------) + // = + // [---) + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 5), + intRange(0, 10), + intRange(5, 10)); + + // [-------> + // [-----) + // = + // None + testMerge(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 10), + intRange(0, 5), + KuduPredicate.none(intCol)); + + // Upper Bound + Range + //-------------------- + + // <------------) + // [---) + // = + // [---) + testMerge(KuduPredicate.newComparisonPredicate(intCol, LESS, 10), + intRange(3, 8), + intRange(3, 8)); + + // <------------) + // [--------) + // = + // [--------) + testMerge(KuduPredicate.newComparisonPredicate(intCol, LESS, 10), + intRange(5, 10), + intRange(5, 10)); + + + // <------------) + // [--------) + // = + // [----) + testMerge(KuduPredicate.newComparisonPredicate(intCol, LESS, 5), + intRange(0, 10), + intRange(0, 5)); + + // Range + Equality + //-------------------- + + // [---) AND + // | + // = + // None + testMerge(intRange(3, 5), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 1), + KuduPredicate.none(intCol)); + + // [---) AND + // | + // = + // | + testMerge(intRange(0, 5), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 0), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 0)); + + // [---) AND + // | + // = + // | + testMerge(intRange(0, 5), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 3), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 3)); + + // [---) AND + // | + // = + // None + testMerge(intRange(0, 5), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 5), + KuduPredicate.none(intCol)); + + // [---) AND + // | + // = + // None + testMerge(intRange(0, 5), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 7), + KuduPredicate.none(intCol)); + + // IN list + IN list + //-------------------- + + // | | | + // | | | + testMerge(intInList(0, 10, 20), + intInList(20, 10, 20, 30), + intInList(10, 20)); + + // | | + // | | + testMerge(intInList(0, 20), + intInList(15, 30), + KuduPredicate.none(intCol)); + + // IN list + NOT NULL + //-------------------- + + testMerge(intInList(10), + KuduPredicate.newIsNotNullPredicate(intCol), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 10)); + + testMerge(intInList(10, -100), + KuduPredicate.newIsNotNullPredicate(intCol), + intInList(-100, 10)); + + // IN list + Equality + //-------------------- + + // | | | + // | + // = + // | + testMerge(intInList(0, 10, 20), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 10), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 10)); + + // | | | + // | + // = + // none + testMerge(intInList(0, 10, 20), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 30), + KuduPredicate.none(intCol)); + + // IN list + Range + //-------------------- + + // | | | | | + // [---) + // = + // | | + testMerge(intInList(0, 10, 20, 30, 40), + intRange(10, 30), + intInList(10, 20)); + + // | | | | + // [--) + // = + // none + testMerge(intInList(0, 10, 20, 30), + intRange(25, 30), + KuduPredicate.none(intCol)); + + // | | | | + // [------> + // = + // | | + testMerge(intInList(0, 10, 20, 30), + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 15), + intInList(20, 30)); + + // | | | + // [------> + // = + // | + testMerge(intInList(0, 10, 20), + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 15), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 20)); + + // | | + // [------> + // = + // none + testMerge(intInList(0, 10), + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 15), + KuduPredicate.none(intCol)); + + // | | | | + // <--) + // = + // | | + testMerge(intInList(0, 10, 20, 30), + KuduPredicate.newComparisonPredicate(intCol, LESS, 15), + intInList(0, 10)); + + // | | | + // <--) + // = + // | + testMerge(intInList(0, 10, 20), + KuduPredicate.newComparisonPredicate(intCol, LESS, 10), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 0)); + + // | | + // <--) + // = + // none + testMerge(intInList(10, 20), + KuduPredicate.newComparisonPredicate(intCol, LESS, 5), + KuduPredicate.none(intCol)); + + // None + //-------------------- + + // None AND + // [----> + // = + // None + testMerge(KuduPredicate.none(intCol), + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 0), + KuduPredicate.none(intCol)); + + // None AND + // <----) + // = + // None + testMerge(KuduPredicate.none(intCol), + KuduPredicate.newComparisonPredicate(intCol, LESS, 0), + KuduPredicate.none(intCol)); + + // None AND + // [----) + // = + // None + testMerge(KuduPredicate.none(intCol), + intRange(3, 7), + KuduPredicate.none(intCol)); + + // None AND + // | + // = + // None + testMerge(KuduPredicate.none(intCol), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 5), + KuduPredicate.none(intCol)); + + // None AND + // None + // = + // None + testMerge(KuduPredicate.none(intCol), + KuduPredicate.none(intCol), + KuduPredicate.none(intCol)); + + // IS NOT NULL + //-------------------- + + // IS NOT NULL AND + // NONE + // = + // NONE + testMerge(KuduPredicate.newIsNotNullPredicate(intCol), + KuduPredicate.none(intCol), + KuduPredicate.none(intCol)); + + // IS NOT NULL AND + // IS NULL + // = + // NONE + testMerge(KuduPredicate.newIsNotNullPredicate(intCol), + KuduPredicate.newIsNullPredicate(intCol), + KuduPredicate.none(intCol)); + + // IS NOT NULL AND + // IS NOT NULL + // = + // IS NOT NULL + testMerge(KuduPredicate.newIsNotNullPredicate(intCol), + KuduPredicate.newIsNotNullPredicate(intCol), + KuduPredicate.newIsNotNullPredicate(intCol)); + + // IS NOT NULL AND + // | + // = + // | + testMerge(KuduPredicate.newIsNotNullPredicate(intCol), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 5), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 5)); + + // IS NOT NULL AND + // [-------> + // = + // [-------> + testMerge(KuduPredicate.newIsNotNullPredicate(intCol), + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 5), + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 5)); + + // IS NOT NULL AND + // <---------) + // = + // <---------) + testMerge(KuduPredicate.newIsNotNullPredicate(intCol), + KuduPredicate.newComparisonPredicate(intCol, LESS, 5), + KuduPredicate.newComparisonPredicate(intCol, LESS, 5)); + + // IS NOT NULL AND + // [-------) + // = + // [-------) + testMerge(KuduPredicate.newIsNotNullPredicate(intCol), + intRange(0, 12), + intRange(0, 12)); + + + // IS NOT NULL AND + // | | | + // = + // | | | + testMerge(KuduPredicate.newIsNotNullPredicate(intCol), + intInList(0, 10, 20), + intInList(0, 10, 20)); + + // IS NULL + //-------------------- + + // IS NULL AND + // NONE + // = + // NONE + testMerge(KuduPredicate.newIsNullPredicate(intCol), + KuduPredicate.none(intCol), + KuduPredicate.none(intCol)); + + // IS NULL AND + // IS NULL + // = + // IS_NULL + testMerge(KuduPredicate.newIsNullPredicate(intCol), + KuduPredicate.newIsNullPredicate(intCol), + KuduPredicate.newIsNullPredicate(intCol)); + + // IS NULL AND + // IS NOT NULL + // = + // NONE + testMerge(KuduPredicate.newIsNullPredicate(intCol), + KuduPredicate.newIsNotNullPredicate(intCol), + KuduPredicate.none(intCol)); + + // IS NULL AND + // | + // = + // NONE + testMerge(KuduPredicate.newIsNullPredicate(intCol), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 5), + KuduPredicate.none(intCol)); + + // IS NULL AND + // [-------> + // = + // NONE + testMerge(KuduPredicate.newIsNullPredicate(intCol), + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 0), + KuduPredicate.none(intCol)); + + // IS NULL AND + // <---------) + // = + // NONE + testMerge(KuduPredicate.newIsNullPredicate(intCol), + KuduPredicate.newComparisonPredicate(intCol, LESS, 5), + KuduPredicate.none(intCol)); + + // IS NULL AND + // [-------) + // = + // NONE + testMerge(KuduPredicate.newIsNullPredicate(intCol), + intRange(0, 12), + KuduPredicate.none(intCol)); + + // IS NULL AND + // | | | + // = + // NONE + testMerge(KuduPredicate.newIsNullPredicate(intCol), + intInList(0, 10, 20), + KuduPredicate.none(intCol)); + } + + /** + * Tests tricky merges on a var length type. + */ + @Test + public void testMergeString() { + + // [-----> + // <-----) + // = + // None + testMerge(KuduPredicate.newComparisonPredicate(stringCol, GREATER_EQUAL, "b\0"), + KuduPredicate.newComparisonPredicate(stringCol, LESS, "b"), + KuduPredicate.none(stringCol)); + + // [-----> + // <-----) + // = + // None + testMerge(KuduPredicate.newComparisonPredicate(stringCol, GREATER_EQUAL, "b"), + KuduPredicate.newComparisonPredicate(stringCol, LESS, "b"), + KuduPredicate.none(stringCol)); + + // [-----> + // <----) + // = + // | + testMerge(KuduPredicate.newComparisonPredicate(stringCol, GREATER_EQUAL, "b"), + KuduPredicate.newComparisonPredicate(stringCol, LESS, "b\0"), + KuduPredicate.newComparisonPredicate(stringCol, EQUAL, "b")); + + // [-----> + // <-----) + // = + // [--) + testMerge(KuduPredicate.newComparisonPredicate(stringCol, GREATER_EQUAL, "a"), + KuduPredicate.newComparisonPredicate(stringCol, LESS, "a\0\0"), + new KuduPredicate(RANGE, stringCol, + Bytes.fromString("a"), Bytes.fromString("a\0\0"))); + + // [-----> + // | | | | + // = + // | | | + testMerge(KuduPredicate.newComparisonPredicate(stringCol, GREATER_EQUAL, "a"), + stringInList("a", "c", "b", ""), + stringInList("a", "b", "c")); + + // IS NOT NULL + // | | | | + // = + // | | | | + testMerge(KuduPredicate.newIsNotNullPredicate(stringCol), + stringInList("a", "c", "b", ""), + stringInList("", "a", "b", "c")); + } + + @Test + public void testBoolean() { + + // b >= false + Assert.assertEquals(KuduPredicate.newIsNotNullPredicate(boolCol), + KuduPredicate.newComparisonPredicate(boolCol, GREATER_EQUAL, false)); + // b > false + Assert.assertEquals(KuduPredicate.newComparisonPredicate(boolCol, EQUAL, true), + KuduPredicate.newComparisonPredicate(boolCol, GREATER, false)); + // b = false + Assert.assertEquals(KuduPredicate.newComparisonPredicate(boolCol, EQUAL, false), + KuduPredicate.newComparisonPredicate(boolCol, EQUAL, false)); + // b < false + Assert.assertEquals(KuduPredicate.none(boolCol), + KuduPredicate.newComparisonPredicate(boolCol, LESS, false)); + // b <= false + Assert.assertEquals(KuduPredicate.newComparisonPredicate(boolCol, EQUAL, false), + KuduPredicate.newComparisonPredicate(boolCol, LESS_EQUAL, false)); + + // b >= true + Assert.assertEquals(KuduPredicate.newComparisonPredicate(boolCol, EQUAL, true), + KuduPredicate.newComparisonPredicate(boolCol, GREATER_EQUAL, true)); + // b > true + Assert.assertEquals(KuduPredicate.none(boolCol), + KuduPredicate.newComparisonPredicate(boolCol, GREATER, true)); + // b = true + Assert.assertEquals(KuduPredicate.newComparisonPredicate(boolCol, EQUAL, true), + KuduPredicate.newComparisonPredicate(boolCol, EQUAL, true)); + // b < true + Assert.assertEquals(KuduPredicate.newComparisonPredicate(boolCol, EQUAL, false), + KuduPredicate.newComparisonPredicate(boolCol, LESS, true)); + // b <= true + Assert.assertEquals(KuduPredicate.newIsNotNullPredicate(boolCol), + KuduPredicate.newComparisonPredicate(boolCol, LESS_EQUAL, true)); + + // b IN () + Assert.assertEquals(KuduPredicate.none(boolCol), boolInList()); + + // b IN (true) + Assert.assertEquals(KuduPredicate.newComparisonPredicate(boolCol, EQUAL, true), + boolInList(true, true, true)); + + // b IN (false) + Assert.assertEquals(KuduPredicate.newComparisonPredicate(boolCol, EQUAL, false), + boolInList(false)); + + // b IN (false, true) + Assert.assertEquals(KuduPredicate.newIsNotNullPredicate(boolCol), + boolInList(false, true, false, true)); + } + + /** + * Tests basic predicate merges across all types. + */ + @Test + public void testAllTypesMerge() { + + testMerge(KuduPredicate.newComparisonPredicate(boolCol, GREATER_EQUAL, false), + KuduPredicate.newComparisonPredicate(boolCol, LESS, true), + new KuduPredicate(KuduPredicate.PredicateType.EQUALITY, + boolCol, + Bytes.fromBoolean(false), + null)); + + testMerge(KuduPredicate.newComparisonPredicate(boolCol, GREATER_EQUAL, false), + KuduPredicate.newComparisonPredicate(boolCol, LESS_EQUAL, true), + KuduPredicate.newIsNotNullPredicate(boolCol)); + + testMerge(KuduPredicate.newComparisonPredicate(byteCol, GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(byteCol, LESS, 10), + new KuduPredicate(RANGE, + byteCol, + new byte[] { (byte) 0 }, + new byte[] { (byte) 10 })); + + testMerge(KuduPredicate.newInListPredicate(byteCol, ImmutableList.of((byte) 12, (byte) 14, + (byte) 16, (byte) 18)), + KuduPredicate.newInListPredicate(byteCol, ImmutableList.of((byte) 14, (byte) 18, + (byte) 20)), + KuduPredicate.newInListPredicate(byteCol, ImmutableList.of((byte) 14, (byte) 18))); + + testMerge(KuduPredicate.newComparisonPredicate(shortCol, GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(shortCol, LESS, 10), + new KuduPredicate(RANGE, + shortCol, + Bytes.fromShort((short) 0), + Bytes.fromShort((short) 10))); + + testMerge(KuduPredicate.newInListPredicate(shortCol, ImmutableList.of((short) 12, (short) 14, + (short) 16, (short) 18)), + KuduPredicate.newInListPredicate(shortCol, ImmutableList.of((short) 14, (short) 18, + (short) 20)), + KuduPredicate.newInListPredicate(shortCol, ImmutableList.of((short) 14, (short) 18))); + + testMerge(KuduPredicate.newComparisonPredicate(longCol, GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(longCol, LESS, 10), + new KuduPredicate(RANGE, + longCol, + Bytes.fromLong(0), + Bytes.fromLong(10))); + + testMerge(KuduPredicate.newInListPredicate(longCol, ImmutableList.of(12L, 14L, 16L, 18L)), + KuduPredicate.newInListPredicate(longCol, ImmutableList.of(14L, 18L, 20L)), + KuduPredicate.newInListPredicate(longCol, ImmutableList.of(14L, 18L))); + + testMerge(KuduPredicate.newComparisonPredicate(floatCol, GREATER_EQUAL, 123.45f), + KuduPredicate.newComparisonPredicate(floatCol, LESS, 678.90f), + new KuduPredicate(RANGE, + floatCol, + Bytes.fromFloat(123.45f), + Bytes.fromFloat(678.90f))); + + testMerge(KuduPredicate.newInListPredicate(floatCol, ImmutableList.of(12f, 14f, 16f, 18f)), + KuduPredicate.newInListPredicate(floatCol, ImmutableList.of(14f, 18f, 20f)), + KuduPredicate.newInListPredicate(floatCol, ImmutableList.of(14f, 18f))); + + testMerge(KuduPredicate.newComparisonPredicate(doubleCol, GREATER_EQUAL, 123.45), + KuduPredicate.newComparisonPredicate(doubleCol, LESS, 678.90), + new KuduPredicate(RANGE, + doubleCol, + Bytes.fromDouble(123.45), + Bytes.fromDouble(678.90))); + + testMerge(KuduPredicate.newInListPredicate(doubleCol, ImmutableList.of(12d, 14d, 16d, 18d)), + KuduPredicate.newInListPredicate(doubleCol, ImmutableList.of(14d, 18d, 20d)), + KuduPredicate.newInListPredicate(doubleCol, ImmutableList.of(14d, 18d))); + + testMerge(KuduPredicate.newComparisonPredicate(decimal32Col, GREATER_EQUAL, + BigDecimal.valueOf(12345, 2)), + KuduPredicate.newComparisonPredicate(decimal32Col, LESS, + BigDecimal.valueOf(67890,2)), + new KuduPredicate(RANGE, + decimal32Col, + Bytes.fromBigDecimal(BigDecimal.valueOf(12345, 2), + DecimalUtil.MAX_DECIMAL32_PRECISION), + Bytes.fromBigDecimal(BigDecimal.valueOf(67890, 2), + DecimalUtil.MAX_DECIMAL32_PRECISION))); + + testMerge(KuduPredicate.newInListPredicate(decimal32Col, ImmutableList.of( + BigDecimal.valueOf(12345, 2), + BigDecimal.valueOf(45678, 2))), + KuduPredicate.newInListPredicate(decimal32Col, ImmutableList.of( + BigDecimal.valueOf(45678, 2), + BigDecimal.valueOf(98765, 2))), + KuduPredicate.newInListPredicate(decimal32Col, ImmutableList.of( + BigDecimal.valueOf(45678, 2)))); + + testMerge(KuduPredicate.newInListPredicate(decimal64Col, ImmutableList.of( + BigDecimal.valueOf(12345678910L, 2), + BigDecimal.valueOf(34567891011L, 2))), + KuduPredicate.newInListPredicate(decimal64Col, ImmutableList.of( + BigDecimal.valueOf(34567891011L, 2), + BigDecimal.valueOf(98765432111L, 2))), + KuduPredicate.newInListPredicate(decimal64Col, ImmutableList.of( + BigDecimal.valueOf(34567891011L, 2)))); + + testMerge(KuduPredicate.newComparisonPredicate(decimal64Col, GREATER_EQUAL, + BigDecimal.valueOf(12345678910L, 2)), + KuduPredicate.newComparisonPredicate(decimal64Col, LESS, + BigDecimal.valueOf(67890101112L,2)), + new KuduPredicate(RANGE, + decimal64Col, + Bytes.fromBigDecimal(BigDecimal.valueOf(12345678910L, 2), + DecimalUtil.MAX_DECIMAL64_PRECISION), + Bytes.fromBigDecimal(BigDecimal.valueOf(67890101112L, 2), + DecimalUtil.MAX_DECIMAL64_PRECISION))); + + testMerge(KuduPredicate.newInListPredicate(decimal128Col, ImmutableList.of( + new BigDecimal("1234567891011121314.15"), + new BigDecimal("3456789101112131415.16"))), + KuduPredicate.newInListPredicate(decimal128Col, ImmutableList.of( + new BigDecimal("3456789101112131415.16"), + new BigDecimal("9876543212345678910.11"))), + KuduPredicate.newInListPredicate(decimal128Col, ImmutableList.of( + new BigDecimal("3456789101112131415.16")))); + + testMerge(KuduPredicate.newComparisonPredicate(decimal128Col, GREATER_EQUAL, + new BigDecimal("1234567891011121314.15")), + KuduPredicate.newComparisonPredicate(decimal128Col, LESS, + new BigDecimal("67891011121314151617.18")), + new KuduPredicate(RANGE, + decimal128Col, + Bytes.fromBigDecimal(new BigDecimal("1234567891011121314.15"), + DecimalUtil.MAX_DECIMAL128_PRECISION), + Bytes.fromBigDecimal(new BigDecimal("67891011121314151617.18"), + DecimalUtil.MAX_DECIMAL128_PRECISION))); + + testMerge(KuduPredicate.newComparisonPredicate(binaryCol, GREATER_EQUAL, + new byte[] { 0, 1, 2, 3, 4, 5, 6 }), + KuduPredicate.newComparisonPredicate(binaryCol, LESS, new byte[] { 10 }), + new KuduPredicate(RANGE, + binaryCol, + new byte[] { 0, 1, 2, 3, 4, 5, 6 }, + new byte[] { 10 })); + + testMerge(KuduPredicate.newComparisonPredicate(varcharCol, GREATER_EQUAL, "bar"), + KuduPredicate.newComparisonPredicate(varcharCol, LESS, "foo"), + new KuduPredicate(RANGE, + varcharCol, + new byte[] {98, 97, 114}, + new byte[] {102, 111, 111})); + + byte[] valA = "a".getBytes(UTF_8); + byte[] valB = "b".getBytes(UTF_8); + byte[] valC = "c".getBytes(UTF_8); + byte[] valD = "d".getBytes(UTF_8); + byte[] valE = "e".getBytes(UTF_8); + testMerge(KuduPredicate.newInListPredicate(binaryCol, ImmutableList.of(valA, valB, valC, valD)), + KuduPredicate.newInListPredicate(binaryCol, ImmutableList.of(valB, valD, valE)), + KuduPredicate.newInListPredicate(binaryCol, ImmutableList.of(valB, valD))); + } + + @Test + public void testLessEqual() { + Assert.assertEquals(KuduPredicate.newComparisonPredicate(byteCol, LESS_EQUAL, 10), + KuduPredicate.newComparisonPredicate(byteCol, LESS, 11)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(shortCol, LESS_EQUAL, 10), + KuduPredicate.newComparisonPredicate(shortCol, LESS, 11)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(intCol, LESS_EQUAL, 10), + KuduPredicate.newComparisonPredicate(intCol, LESS, 11)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(longCol, LESS_EQUAL, 10), + KuduPredicate.newComparisonPredicate(longCol, LESS, 11)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(floatCol, LESS_EQUAL, 12.345f), + KuduPredicate.newComparisonPredicate(floatCol, LESS, Math.nextAfter(12.345f, + Float.POSITIVE_INFINITY))); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(doubleCol, LESS_EQUAL, 12.345), + KuduPredicate.newComparisonPredicate(doubleCol, LESS, Math.nextAfter(12.345, + Float.POSITIVE_INFINITY))); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(decimal32Col, LESS_EQUAL, + BigDecimal.valueOf(12345,2)), + KuduPredicate.newComparisonPredicate(decimal32Col, LESS, + BigDecimal.valueOf(12346,2))); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(stringCol, LESS_EQUAL, "a"), + KuduPredicate.newComparisonPredicate(stringCol, LESS, "a\0")); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(binaryCol, LESS_EQUAL, new byte[] { (byte) 10 }), + KuduPredicate.newComparisonPredicate(binaryCol, LESS, new byte[] { (byte) 10, (byte) 0 })); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(varcharCol, LESS_EQUAL, "a"), + KuduPredicate.newComparisonPredicate(varcharCol, LESS, "a\0")); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(byteCol, LESS_EQUAL, Byte.MAX_VALUE), + KuduPredicate.newIsNotNullPredicate(byteCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(shortCol, LESS_EQUAL, Short.MAX_VALUE), + KuduPredicate.newIsNotNullPredicate(shortCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(intCol, LESS_EQUAL, Integer.MAX_VALUE), + KuduPredicate.newIsNotNullPredicate(intCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(longCol, LESS_EQUAL, Long.MAX_VALUE), + KuduPredicate.newIsNotNullPredicate(longCol)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(floatCol, LESS_EQUAL, Float.MAX_VALUE), + KuduPredicate.newComparisonPredicate(floatCol, LESS, Float.POSITIVE_INFINITY)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(floatCol, LESS_EQUAL, Float.POSITIVE_INFINITY), + KuduPredicate.newIsNotNullPredicate(floatCol)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(doubleCol, LESS_EQUAL, Double.MAX_VALUE), + KuduPredicate.newComparisonPredicate(doubleCol, LESS, Double.POSITIVE_INFINITY)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(doubleCol, LESS_EQUAL, Double.POSITIVE_INFINITY), + KuduPredicate.newIsNotNullPredicate(doubleCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(dateCol, LESS_EQUAL, + Date.valueOf("2020-06-01")), + KuduPredicate.newComparisonPredicate(dateCol, LESS, Date.valueOf("2020-06-02"))); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(dateCol, LESS_EQUAL, + DateUtil.epochDaysToSqlDate(DateUtil.MAX_DATE_VALUE)), + KuduPredicate.newIsNotNullPredicate(dateCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(dateCol, LESS_EQUAL, + DateUtil.MAX_DATE_VALUE), KuduPredicate.newIsNotNullPredicate(dateCol)); + } + + @Test + public void testGreater() { + Assert.assertEquals(KuduPredicate.newComparisonPredicate(byteCol, GREATER_EQUAL, 11), + KuduPredicate.newComparisonPredicate(byteCol, GREATER, 10)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(shortCol, GREATER_EQUAL, 11), + KuduPredicate.newComparisonPredicate(shortCol, GREATER, 10)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, 11), + KuduPredicate.newComparisonPredicate(intCol, GREATER, 10)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(longCol, GREATER_EQUAL, 11), + KuduPredicate.newComparisonPredicate(longCol, GREATER, 10)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(floatCol, GREATER_EQUAL, + Math.nextAfter(12.345f, Float.MAX_VALUE)), + KuduPredicate.newComparisonPredicate(floatCol, GREATER, 12.345f)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(doubleCol, GREATER_EQUAL, + Math.nextAfter(12.345, Float.MAX_VALUE)), + KuduPredicate.newComparisonPredicate(doubleCol, GREATER, 12.345)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(decimal32Col, GREATER_EQUAL, + BigDecimal.valueOf(12346, 2)), + KuduPredicate.newComparisonPredicate(decimal32Col, GREATER, + BigDecimal.valueOf(12345, 2))); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(stringCol, GREATER_EQUAL, "a\0"), + KuduPredicate.newComparisonPredicate(stringCol, GREATER, "a")); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(binaryCol, GREATER_EQUAL, + new byte[] { (byte) 10, (byte) 0 }), + KuduPredicate.newComparisonPredicate(binaryCol, GREATER, new byte[] { (byte) 10 })); + + Assert.assertEquals(KuduPredicate.none(byteCol), + KuduPredicate.newComparisonPredicate(byteCol, GREATER, Byte.MAX_VALUE)); + Assert.assertEquals(KuduPredicate.none(shortCol), + KuduPredicate.newComparisonPredicate(shortCol, GREATER, Short.MAX_VALUE)); + Assert.assertEquals(KuduPredicate.none(intCol), + KuduPredicate.newComparisonPredicate(intCol, GREATER, Integer.MAX_VALUE)); + Assert.assertEquals(KuduPredicate.none(longCol), + KuduPredicate.newComparisonPredicate(longCol, GREATER, Long.MAX_VALUE)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(floatCol, GREATER_EQUAL, Float.POSITIVE_INFINITY), + KuduPredicate.newComparisonPredicate(floatCol, GREATER, Float.MAX_VALUE)); + Assert.assertEquals( + KuduPredicate.none(floatCol), + KuduPredicate.newComparisonPredicate(floatCol, GREATER, Float.POSITIVE_INFINITY)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(doubleCol, GREATER_EQUAL, Double.POSITIVE_INFINITY), + KuduPredicate.newComparisonPredicate(doubleCol, GREATER, Double.MAX_VALUE)); + Assert.assertEquals( + KuduPredicate.none(doubleCol), + KuduPredicate.newComparisonPredicate(doubleCol, GREATER, Double.POSITIVE_INFINITY)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(dateCol, GREATER_EQUAL, + Date.valueOf("2020-06-15")), + KuduPredicate.newComparisonPredicate(dateCol, GREATER, Date.valueOf("2020-06-14"))); + } + + @Test + public void testLess() { + Assert.assertEquals(KuduPredicate.newComparisonPredicate(byteCol, LESS, Byte.MIN_VALUE), + KuduPredicate.none(byteCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(shortCol, LESS, Short.MIN_VALUE), + KuduPredicate.none(shortCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(intCol, LESS, Integer.MIN_VALUE), + KuduPredicate.none(intCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(longCol, LESS, Long.MIN_VALUE), + KuduPredicate.none(longCol)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(floatCol, LESS, Float.NEGATIVE_INFINITY), + KuduPredicate.none(floatCol)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(doubleCol, LESS, Double.NEGATIVE_INFINITY), + KuduPredicate.none(doubleCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(decimal32Col, LESS, + DecimalUtil.minValue(DecimalUtil.MAX_DECIMAL32_PRECISION, 2)), + KuduPredicate.none(decimal32Col)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(decimal64Col, LESS, + DecimalUtil.minValue(DecimalUtil.MAX_DECIMAL64_PRECISION, 2)), + KuduPredicate.none(decimal64Col)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(decimal128Col, LESS, + DecimalUtil.minValue(DecimalUtil.MAX_DECIMAL128_PRECISION, 2)), + KuduPredicate.none(decimal128Col)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(stringCol, LESS, ""), + KuduPredicate.none(stringCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(binaryCol, LESS, new byte[] {}), + KuduPredicate.none(binaryCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(varcharCol, LESS, ""), + KuduPredicate.none(varcharCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(dateCol, LESS, + DateUtil.epochDaysToSqlDate(DateUtil.MIN_DATE_VALUE)), KuduPredicate.none(dateCol)); + } + + @Test + public void testGreaterEqual() { + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(byteCol, GREATER_EQUAL, Byte.MIN_VALUE), + KuduPredicate.newIsNotNullPredicate(byteCol)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(shortCol, GREATER_EQUAL, Short.MIN_VALUE), + KuduPredicate.newIsNotNullPredicate(shortCol)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, Integer.MIN_VALUE), + KuduPredicate.newIsNotNullPredicate(intCol)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(longCol, GREATER_EQUAL, Long.MIN_VALUE), + KuduPredicate.newIsNotNullPredicate(longCol)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(floatCol, GREATER_EQUAL, Float.NEGATIVE_INFINITY), + KuduPredicate.newIsNotNullPredicate(floatCol)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(doubleCol, GREATER_EQUAL, Double.NEGATIVE_INFINITY), + KuduPredicate.newIsNotNullPredicate(doubleCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(decimal32Col, GREATER_EQUAL, + DecimalUtil.minValue(DecimalUtil.MAX_DECIMAL32_PRECISION, 2)), + KuduPredicate.newIsNotNullPredicate(decimal32Col)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(decimal64Col, GREATER_EQUAL, + DecimalUtil.minValue(DecimalUtil.MAX_DECIMAL64_PRECISION, 2)), + KuduPredicate.newIsNotNullPredicate(decimal64Col)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(decimal128Col, GREATER_EQUAL, + DecimalUtil.minValue(DecimalUtil.MAX_DECIMAL128_PRECISION, 2)), + KuduPredicate.newIsNotNullPredicate(decimal128Col)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(stringCol, GREATER_EQUAL, ""), + KuduPredicate.newIsNotNullPredicate(stringCol)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(binaryCol, GREATER_EQUAL, new byte[] {}), + KuduPredicate.newIsNotNullPredicate(binaryCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(varcharCol, GREATER_EQUAL, ""), + KuduPredicate.newIsNotNullPredicate(varcharCol)); + + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(byteCol, GREATER_EQUAL, Byte.MAX_VALUE), + KuduPredicate.newComparisonPredicate(byteCol, EQUAL, Byte.MAX_VALUE)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(shortCol, GREATER_EQUAL, Short.MAX_VALUE), + KuduPredicate.newComparisonPredicate(shortCol, EQUAL, Short.MAX_VALUE)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, Integer.MAX_VALUE), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, Integer.MAX_VALUE)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(longCol, GREATER_EQUAL, Long.MAX_VALUE), + KuduPredicate.newComparisonPredicate(longCol, EQUAL, Long.MAX_VALUE)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(floatCol, GREATER_EQUAL, Float.POSITIVE_INFINITY), + KuduPredicate.newComparisonPredicate(floatCol, EQUAL, Float.POSITIVE_INFINITY)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(doubleCol, GREATER_EQUAL, Double.POSITIVE_INFINITY), + KuduPredicate.newComparisonPredicate(doubleCol, EQUAL, Double.POSITIVE_INFINITY)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(dateCol, GREATER_EQUAL, + DateUtil.epochDaysToSqlDate(DateUtil.MIN_DATE_VALUE)), + KuduPredicate.newIsNotNullPredicate(dateCol)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(dateCol, GREATER_EQUAL, DateUtil.MIN_DATE_VALUE), + KuduPredicate.newIsNotNullPredicate(dateCol)); + } + + @Test + public void testCreateWithObject() { + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(byteCol, EQUAL, (Object) (byte) 10), + KuduPredicate.newComparisonPredicate(byteCol, EQUAL, (byte) 10)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(shortCol, EQUAL, (Object) (short) 10), + KuduPredicate.newComparisonPredicate(shortCol, EQUAL, 10)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(intCol, EQUAL, (Object) 10), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, 10)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(longCol, EQUAL, (Object) 10L), + KuduPredicate.newComparisonPredicate(longCol, EQUAL, 10L)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(floatCol, EQUAL, (Object) 12.345f), + KuduPredicate.newComparisonPredicate(floatCol, EQUAL, 12.345f)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(doubleCol, EQUAL, (Object) 12.345), + KuduPredicate.newComparisonPredicate(doubleCol, EQUAL, 12.345)); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(decimal32Col, EQUAL, + (Object) BigDecimal.valueOf(12345,2)), + KuduPredicate.newComparisonPredicate(decimal32Col, EQUAL, + BigDecimal.valueOf(12345,2))); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(stringCol, EQUAL, (Object) "a"), + KuduPredicate.newComparisonPredicate(stringCol, EQUAL, "a")); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(binaryCol, EQUAL, (Object) new byte[] { (byte) 10 }), + KuduPredicate.newComparisonPredicate(binaryCol, EQUAL, new byte[] { (byte) 10 })); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(varcharCol, EQUAL, (Object) "a"), + KuduPredicate.newComparisonPredicate(varcharCol, EQUAL, "a")); + Assert.assertEquals(KuduPredicate + .newComparisonPredicate(dateCol, EQUAL, (Object) Date.valueOf("2020-06-15")), + KuduPredicate + .newComparisonPredicate(dateCol, EQUAL, Date.valueOf("2020-06-15"))); + } + + @Test + public void testToString() { + Assert.assertEquals("`bool` = true", + KuduPredicate.newComparisonPredicate(boolCol, EQUAL, true).toString()); + Assert.assertEquals("`byte` = 11", + KuduPredicate.newComparisonPredicate(byteCol, EQUAL, 11).toString()); + Assert.assertEquals("`short` = 11", + KuduPredicate.newComparisonPredicate(shortCol, EQUAL, 11).toString()); + Assert.assertEquals("`int` = -123", + KuduPredicate.newComparisonPredicate(intCol, EQUAL, -123).toString()); + Assert.assertEquals("`long` = 5454", + KuduPredicate.newComparisonPredicate(longCol, EQUAL, 5454).toString()); + Assert.assertEquals("`float` = 123.456", + KuduPredicate.newComparisonPredicate(floatCol, EQUAL, 123.456f).toString()); + Assert.assertEquals("`double` = 123.456", + KuduPredicate.newComparisonPredicate(doubleCol, EQUAL, 123.456).toString()); + Assert.assertEquals("`decimal32` = 123.45", + KuduPredicate.newComparisonPredicate(decimal32Col, EQUAL, + BigDecimal.valueOf(12345, 2)).toString()); + Assert.assertEquals("`decimal64` = 123456789.10", + KuduPredicate.newComparisonPredicate(decimal64Col, EQUAL, + BigDecimal.valueOf(12345678910L, 2)).toString()); + Assert.assertEquals("`decimal128` = 1234567891011121314.15", + KuduPredicate.newComparisonPredicate(decimal128Col, EQUAL, + new BigDecimal("1234567891011121314.15")).toString()); + Assert.assertEquals("`string` = \"my string\"", + KuduPredicate.newComparisonPredicate(stringCol, EQUAL, "my string").toString()); + Assert.assertEquals("`binary` = 0xAB01CD", KuduPredicate.newComparisonPredicate( + binaryCol, EQUAL, new byte[] { (byte) 0xAB, (byte) 0x01, (byte) 0xCD }).toString()); + Assert.assertEquals("`int` IN (-10, 0, 10)", + intInList(10, 0, -10).toString()); + Assert.assertEquals("`string` IS NOT NULL", + KuduPredicate.newIsNotNullPredicate(stringCol).toString()); + Assert.assertEquals("`string` IS NULL", + KuduPredicate.newIsNullPredicate(stringCol).toString()); + Assert.assertEquals("`varchar` = \"my varchar\"", + KuduPredicate.newComparisonPredicate(varcharCol, EQUAL, "my varchar").toString()); + Assert.assertEquals("`varchar` IS NOT NULL", + KuduPredicate.newIsNotNullPredicate(varcharCol).toString()); + Assert.assertEquals("`varchar` IS NULL", + KuduPredicate.newIsNullPredicate(varcharCol).toString()); + // IS NULL predicate on non-nullable column = NONE predicate + Assert.assertEquals("`int` NONE", + KuduPredicate.newIsNullPredicate(intCol).toString()); + + Assert.assertEquals("`bool` = true", KuduPredicate.newInListPredicate( + boolCol, ImmutableList.of(true)).toString()); + Assert.assertEquals("`bool` = false", KuduPredicate.newInListPredicate( + boolCol, ImmutableList.of(false)).toString()); + Assert.assertEquals("`bool` IS NOT NULL", KuduPredicate.newInListPredicate( + boolCol, ImmutableList.of(false, true, true)).toString()); + Assert.assertEquals("`byte` IN (1, 10, 100)", KuduPredicate.newInListPredicate( + byteCol, ImmutableList.of((byte) 1, (byte) 10, (byte) 100)).toString()); + Assert.assertEquals("`short` IN (1, 10, 100)", KuduPredicate.newInListPredicate( + shortCol, ImmutableList.of((short) 1, (short) 100, (short) 10)).toString()); + Assert.assertEquals("`int` IN (1, 10, 100)", KuduPredicate.newInListPredicate( + intCol, ImmutableList.of(1, 100, 10)).toString()); + Assert.assertEquals("`long` IN (1, 10, 100)", KuduPredicate.newInListPredicate( + longCol, ImmutableList.of(1L, 100L, 10L)).toString()); + Assert.assertEquals("`float` IN (78.9, 123.456)", KuduPredicate.newInListPredicate( + floatCol, ImmutableList.of(123.456f, 78.9f)).toString()); + Assert.assertEquals("`double` IN (78.9, 123.456)", KuduPredicate.newInListPredicate( + doubleCol, ImmutableList.of(123.456d, 78.9d)).toString()); + Assert.assertEquals("`string` IN (\"a\", \"my string\")", + KuduPredicate.newInListPredicate(stringCol, ImmutableList.of("my string", "a")).toString()); + Assert.assertEquals("`binary` IN (0x00, 0xAB01CD)", KuduPredicate.newInListPredicate( + binaryCol, ImmutableList.of(new byte[] { (byte) 0xAB, (byte) 0x01, (byte) 0xCD }, + new byte[] { (byte) 0x00 })).toString()); + Assert.assertEquals("`date` IS NULL", KuduPredicate.newIsNullPredicate(dateCol).toString()); + Assert.assertEquals("`date` IS NOT NULL", + KuduPredicate.newIsNotNullPredicate(dateCol).toString()); + + Assert.assertEquals("`date` = 2020-06-16", + KuduPredicate.newComparisonPredicate(dateCol, EQUAL, Date.valueOf("2020-06-16")) + .toString()); + List intDates = ImmutableList + .of(DateUtil.sqlDateToEpochDays(Date.valueOf("2020-06-16")), + DateUtil.sqlDateToEpochDays(Date.valueOf("2019-01-01")), + DateUtil.sqlDateToEpochDays(Date.valueOf("2020-11-10"))); + List sqlDates = ImmutableList + .of(Date.valueOf("2020-06-16"), Date.valueOf("2019-01-01"), Date.valueOf("2020-11-10")); + Assert.assertEquals("`date` IN (2019-01-01, 2020-06-16, 2020-11-10)", + KuduPredicate.newInListPredicate(dateCol, intDates).toString()); + Assert.assertEquals("`date` IN (2019-01-01, 2020-06-16, 2020-11-10)", + KuduPredicate.newInListPredicate(dateCol, sqlDates).toString()); + } + + @Test + public void testDecimalCoercion() { + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(decimal32Col, LESS, BigDecimal.valueOf(123)), + KuduPredicate.newComparisonPredicate(decimal32Col, LESS, BigDecimal.valueOf(12300, 2)) + ); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(decimal32Col, GREATER, BigDecimal.valueOf(123, 1)), + KuduPredicate.newComparisonPredicate(decimal32Col, GREATER, BigDecimal.valueOf(1230, 2)) + ); + Assert.assertEquals( + KuduPredicate.newComparisonPredicate(decimal32Col, EQUAL, BigDecimal.valueOf(1, 0)), + KuduPredicate.newComparisonPredicate(decimal32Col, EQUAL, BigDecimal.valueOf(100, 2)) + ); + } + +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduScanner.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduScanner.java new file mode 100644 index 0000000000..9b4e866595 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduScanner.java @@ -0,0 +1,692 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.kudu.client.AsyncKuduScanner.DEFAULT_IS_DELETED_COL_NAME; +import static org.apache.kudu.test.ClientTestUtil.createManyStringsSchema; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.apache.kudu.test.ClientTestUtil.loadDefaultTable; +import static org.apache.kudu.test.junit.AssertHelpers.assertEventuallyTrue; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.Closeable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Common.DataType; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.Operation.ChangeType; +import org.apache.kudu.test.CapturingLogAppender; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.RandomUtils; +import org.apache.kudu.test.cluster.KuduBinaryLocator; +import org.apache.kudu.test.junit.AssertHelpers; +import org.apache.kudu.util.DataGenerator; +import org.apache.kudu.util.Pair; + + +public class TestKuduScanner { + private static final Logger LOG = LoggerFactory.getLogger(TestScannerMultiTablet.class); + + private static final String tableName = "TestKuduScanner"; + + private static final int DIFF_FLUSH_SEC = 1; + + private KuduClient client; + private Random random; + private DataGenerator generator; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + random = RandomUtils.getRandom(); + generator = new DataGenerator.DataGeneratorBuilder() + .random(random) + .build(); + } + + /** + * Test that scans get retried at other tablet servers when they're quiescing. + */ + @Test(timeout = 100000) + public void testScanQuiescingTabletServer() throws Exception { + int rowCount = 500; + Schema tableSchema = new Schema(Collections.singletonList( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build() + )); + + // Create a table with some rows in it. For simplicity, use a + // single-partition table with replicas on each server (we're required + // to set some partitioning though). + CreateTableOptions tableOptions = new CreateTableOptions() + .setRangePartitionColumns(Collections.singletonList("key")) + .setNumReplicas(3); + KuduTable table = client.createTable(tableName, tableSchema, tableOptions); + KuduSession session = client.newSession(); + for (int i = 0; i < rowCount; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt(0, i); + session.apply(insert); + } + + // Quiesce a single tablet server. + List tservers = harness.getTabletServers(); + KuduBinaryLocator.ExecutableInfo exeInfo = KuduBinaryLocator.findBinary("kudu"); + List commandLine = Lists.newArrayList(exeInfo.exePath(), + "tserver", + "quiesce", + "start", + tservers.get(0).toString()); + ProcessBuilder processBuilder = new ProcessBuilder(commandLine); + processBuilder.environment().putAll(exeInfo.environment()); + Process quiesceTserver = processBuilder.start(); + assertEquals(0, quiesceTserver.waitFor()); + + // Now start a scan. Even if the scan goes to the quiescing server, the + // scan request should eventually be routed to a non-quiescing server + // and complete. We aren't guaranteed to hit the quiescing server, but this + // test would frequently fail if we didn't handle quiescing servers properly. + KuduScanner scanner = client.newScannerBuilder(table).build(); + KuduScannerIterator iterator = scanner.iterator(); + assertTrue(iterator.hasNext()); + while (iterator.hasNext()) { + iterator.next(); + } + } + + @Test(timeout = 100000) + public void testIterable() throws Exception { + KuduTable table = client.createTable(tableName, getBasicSchema(), getBasicCreateTableOptions()); + DataGenerator generator = new DataGenerator.DataGeneratorBuilder() + .random(RandomUtils.getRandom()) + .build(); + KuduSession session = client.newSession(); + List insertKeys = new ArrayList<>(); + int numRows = 10; + for (int i = 0; i < numRows; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + generator.randomizeRow(row); + insertKeys.add(row.getInt(0)); + session.apply(insert); + } + + // Ensure that when an enhanced for-loop is used, there's no sharing of RowResult objects. + KuduScanner scanner = client.newScannerBuilder(table).build(); + Set results = new HashSet<>(); + Set resultKeys = new HashSet<>(); + for (RowResult rowResult : scanner) { + results.add(rowResult); + resultKeys.add(rowResult.getInt(0)); + } + assertEquals(numRows, results.size()); + assertTrue(resultKeys.containsAll(insertKeys)); + + // Ensure that when the reuseRowResult optimization is set, only a single RowResult is used. + KuduScanner reuseScanner = client.newScannerBuilder(table).build(); + reuseScanner.setReuseRowResult(true); + Set reuseResult = new HashSet<>(); + for (RowResult rowResult : reuseScanner) { + reuseResult.add(rowResult); + } + // Ensure the same RowResult object is reused. + assertEquals(1, reuseResult.size()); + } + + @Test(timeout = 100000) + @KuduTestHarness.TabletServerConfig(flags = { + "--scanner_ttl_ms=5000", + "--scanner_gc_check_interval_us=500000"}) // 10% of the TTL. + public void testKeepAlive() throws Exception { + int rowCount = 500; + long shortScannerTtlMs = 5000; + + // Create a simple table with a single partition. + Schema tableSchema = new Schema(Collections.singletonList( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build() + )); + + CreateTableOptions tableOptions = new CreateTableOptions() + .setRangePartitionColumns(Collections.singletonList("key")) + .setNumReplicas(1); + KuduTable table = client.createTable(tableName, tableSchema, tableOptions); + + KuduSession session = client.newSession(); + for (int i = 0; i < rowCount; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt(0, i); + session.apply(insert); + } + + // Test that a keepAlivePeriodMs less than the scanner ttl is successful. + KuduScanner goodScanner = client.newScannerBuilder(table) + .batchSizeBytes(100) // Set a small batch size so the first scan doesn't read all the rows. + .keepAlivePeriodMs(shortScannerTtlMs / 4) + .build(); + processKeepAliveScanner(goodScanner, shortScannerTtlMs); + + // Test that a keepAlivePeriodMs greater than the scanner ttl fails. + KuduScanner badScanner = client.newScannerBuilder(table) + .batchSizeBytes(100) // Set a small batch size so the first scan doesn't read all the rows. + .keepAlivePeriodMs(shortScannerTtlMs * 2L) + .build(); + try { + processKeepAliveScanner(badScanner, shortScannerTtlMs); + fail("Should throw a scanner not found exception"); + } catch (RuntimeException ex) { + assertTrue(ex.getMessage().matches(".*Scanner .* not found.*")); + } + } + + private void processKeepAliveScanner(KuduScanner scanner, long scannerTtlMs) throws Exception { + int i = 0; + KuduScannerIterator iterator = scanner.iterator(); + // Ensure reading takes longer than the scanner ttl. + while (iterator.hasNext()) { + iterator.next(); + // Sleep for half the ttl for the first few rows. This ensures + // we are on the same tablet and will go past the ttl without + // a new scan request. It also ensures a single row doesn't go + // longer than the ttl. + if (i < 5) { + Thread.sleep(scannerTtlMs / 2); // Sleep for half the ttl. + i++; + } + } + } + + @Test(timeout = 100000) + public void testScanWithQueryId() throws Exception { + KuduTable table = client.createTable(tableName, getBasicSchema(), getBasicCreateTableOptions()); + DataGenerator generator = new DataGenerator.DataGeneratorBuilder() + .random(RandomUtils.getRandom()) + .build(); + KuduSession session = client.newSession(); + int numRows = 10; + for (int i = 0; i < numRows; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + generator.randomizeRow(row); + session.apply(insert); + } + // Scan with specified query id. + { + int rowsScanned = 0; + KuduScanner scanner = client.newScannerBuilder(table) + .batchSizeBytes(100) + .setQueryId("request-id-for-test") + .build(); + while (scanner.hasMoreRows()) { + rowsScanned += scanner.nextRows().getNumRows(); + } + assertEquals(numRows, rowsScanned); + } + // Scan with default query id. + { + int rowsScanned = 0; + KuduScanner scanner = client.newScannerBuilder(table) + .batchSizeBytes(100) + .build(); + while (scanner.hasMoreRows()) { + rowsScanned += scanner.nextRows().getNumRows(); + } + assertEquals(numRows, rowsScanned); + } + } + + @Test(timeout = 100000) + public void testOpenScanWithDroppedPartition() throws Exception { + // Create a table with 2 range partitions. + final Schema basicSchema = getBasicSchema(); + final String tableName = "testOpenScanWithDroppedPartition"; + PartialRow bottom = basicSchema.newPartialRow(); + bottom.addInt("key", 0); + PartialRow middle = basicSchema.newPartialRow(); + middle.addInt("key", 1000); + PartialRow top = basicSchema.newPartialRow(); + top.addInt("key", 2000); + + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.setRangePartitionColumns(Collections.singletonList("key")); + createOptions.addRangePartition(bottom, middle); + createOptions.addRangePartition(middle, top); + KuduTable table = client.createTable(tableName, basicSchema, createOptions); + + // Load rows into both partitions. + int numRows = 1999; + loadDefaultTable(client, tableName, numRows); + + // Scan the rows while dropping a partition. + KuduScanner scanner = client.newScannerBuilder(table) + .batchSizeBytes(100) // Set a small batch size so the first scan doesn't read all the rows. + .build(); + + int rowsScanned = 0; + int batchNum = 0; + while (scanner.hasMoreRows()) { + if (batchNum == 1) { + CapturingLogAppender capture = new CapturingLogAppender(); + // Drop the partition. + try (Closeable unused = capture.attach()) { + client.alterTable(tableName, + new AlterTableOptions().dropRangePartition(bottom, middle)); + // Give time for the background drop operations. + Thread.sleep(1000); + } + // Verify the partition was dropped. + KuduPartitioner partitioner = + new KuduPartitioner.KuduPartitionerBuilder(table).build(); + assertEquals("The partition was not dropped", 1, partitioner.numPartitions()); + assertTrue(capture.getAppendedText().contains("Deleting tablet data")); + assertTrue(capture.getAppendedText().contains("successfully deleted")); + } + rowsScanned += scanner.nextRows().getNumRows(); + batchNum++; + } + + assertTrue("All messages were consumed in the first batch", batchNum > 1); + assertEquals("Some message were not consumed", numRows, rowsScanned); + } + + @Test(timeout = 100000) + @KuduTestHarness.TabletServerConfig(flags = { "--flush_threshold_secs=" + DIFF_FLUSH_SEC }) + public void testDiffScan() throws Exception { + Schema schema = new Schema(Arrays.asList( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build(), + // Include a column with the default IS_DELETED column name to test collision handling. + new ColumnSchema.ColumnSchemaBuilder(DEFAULT_IS_DELETED_COL_NAME, Type.INT32).build() + )); + + KuduTable table = client.createTable(tableName, schema, getBasicCreateTableOptions()); + + // Generate some rows before the start time. Ensure there's at least one insert. + int beforeBounds = 5; + int numInserts = RandomUtils.nextIntInRange(random, 1, beforeBounds); + int numUpdates = random.nextInt(beforeBounds); + int numDeletes = random.nextInt(beforeBounds); + List beforeOps = + generateMutationOperations(table, numInserts, numUpdates, numDeletes); + Map before = applyOperations(beforeOps); + LOG.info("Before: {}", before); + + // Set the start timestamp after the initial mutations by getting the propagated timestamp, + // and incrementing by 1. + long startHT = client.getLastPropagatedTimestamp() + 1; + LOG.info("startHT: {}", startHT); + + // Generate row mutations. + // The mutations performed here are what should be seen by the diff scan. + int mutationBounds = 10; + int expectedNumInserts = random.nextInt(mutationBounds); + int expectedNumUpdates = random.nextInt(mutationBounds); + int expectedNumDeletes = random.nextInt(mutationBounds); + List operations = generateMutationOperations(table, expectedNumInserts, + expectedNumUpdates, expectedNumDeletes); + Map mutations = applyOperations(operations); + LOG.info("Mutations: {}", mutations); + + // Set the end timestamp after the test mutations by getting the propagated timestamp, + // and incrementing by 1. + long endHT = client.getLastPropagatedTimestamp() + 1; + LOG.info("endHT: {}", endHT); + + // Generate some rows after the end time. + int afterBounds = 5; + numInserts = random.nextInt(afterBounds); + numUpdates = random.nextInt(afterBounds); + numDeletes = random.nextInt(afterBounds); + List afterOps = + generateMutationOperations(table, numInserts, numUpdates, numDeletes); + Map after = applyOperations(afterOps); + LOG.info("After: {}", after); + + // Diff scan the time range. + // Pass through the scan token API to ensure serialization of tokens works too. + List tokens = client.newScanTokenBuilder(table) + .diffScan(startHT, endHT) + .build(); + List results = new ArrayList<>(); + for (KuduScanToken token : tokens) { + KuduScanner scanner = KuduScanToken.deserializeIntoScanner(token.serialize(), client); + + // Verify the IS_DELETED column is appended at the end of the projection. + Schema projection = scanner.getProjectionSchema(); + int isDeletedIndex = projection.getIsDeletedIndex(); + assertEquals(projection.getColumnCount() - 1, isDeletedIndex); + // Verify the IS_DELETED column has the correct types. + ColumnSchema isDeletedCol = projection.getColumnByIndex(isDeletedIndex); + assertEquals(Type.BOOL, isDeletedCol.getType()); + assertEquals(DataType.IS_DELETED, isDeletedCol.getWireType()); + // Verify the IS_DELETED column is named to avoid collision. + assertEquals(projection.getColumnByIndex(isDeletedIndex), + projection.getColumn(DEFAULT_IS_DELETED_COL_NAME + "_")); + + for (RowResult row : scanner) { + results.add(row); + } + } + + // DELETEs won't be found in the results because the rows to which they + // apply were also inserted within the diff scan's time range, which means + // they will be excluded from the scan results. + assertEquals(mutations.size() - expectedNumDeletes, results.size()); + + // Count the results and verify their change type. + int resultNumInserts = 0; + int resultNumUpdates = 0; + int resultExtra = 0; + for (RowResult result : results) { + Integer key = result.getInt(0); + LOG.info("Processing key {}", key); + ChangeType type = mutations.get(key); + if (type == ChangeType.INSERT) { + assertFalse(result.isDeleted()); + resultNumInserts++; + } else if (type == ChangeType.UPDATE) { + assertFalse(result.isDeleted()); + resultNumUpdates++; + } else if (type == ChangeType.DELETE) { + fail("Shouldn't see any DELETEs"); + } else { + // The key was not found in the mutations map. This means that we somehow managed to scan + // a row that was never mutated. It's an error and will trigger an assert below. + assertNull(type); + resultExtra++; + } + } + assertEquals(expectedNumInserts, resultNumInserts); + assertEquals(expectedNumUpdates, resultNumUpdates); + assertEquals(0, resultExtra); + } + + /** + * Applies a list of Operations and returns the final ChangeType for each key. + * @param operations the operations to apply. + * @return a map of each key and its final ChangeType. + */ + private Map applyOperations(List operations) throws Exception { + Map results = new HashMap<>(); + // If there are no operations, return early. + if (operations.isEmpty()) { + return results; + } + KuduSession session = client.newSession(); + // On some runs, wait long enough to flush at the start. + if (random.nextBoolean()) { + LOG.info("Waiting for a flush at the start of applyOperations"); + Thread.sleep(DIFF_FLUSH_SEC + 1); + } + + // Pick an int as a flush indicator so we flush once on average while applying operations. + int flushInt = random.nextInt(operations.size()); + for (Operation op : operations) { + // On some runs, wait long enough to flush while applying operations. + if (random.nextInt(operations.size()) == flushInt) { + LOG.info("Waiting for a flush in the middle of applyOperations"); + Thread.sleep(DIFF_FLUSH_SEC + 1); + } + OperationResponse resp = session.apply(op); + if (resp.hasRowError()) { + LOG.error("Could not mutate row: " + resp.getRowError().getErrorStatus()); + } + assertFalse(resp.hasRowError()); + results.put(op.getRow().getInt(0), op.getChangeType()); + } + return results; + } + + /** + * Generates a list of random mutation operations. Any unique row, identified by + * it's key, could have a random number of operations/mutations. However, the + * target count of numInserts, numUpdates and numDeletes will always be achieved + * if the entire list of operations is processed. + * + * @param table the table to generate operations for + * @param numInserts The number of row mutations to end with an insert + * @param numUpdates The number of row mutations to end with an update + * @param numDeletes The number of row mutations to end with an delete + * @return a list of random mutation operations + */ + private List generateMutationOperations( + KuduTable table, int numInserts, int numUpdates, int numDeletes) throws Exception { + + List results = new ArrayList<>(); + List unfinished = new ArrayList<>(); + int minMutationsBound = 5; + + // Generate Operations to initialize all of the row with inserts. + List> changeCounts = Arrays.asList( + new Pair<>(ChangeType.INSERT, numInserts), + new Pair<>(ChangeType.UPDATE, numUpdates), + new Pair<>(ChangeType.DELETE, numDeletes)); + for (Pair changeCount : changeCounts) { + ChangeType type = changeCount.getFirst(); + int count = changeCount.getSecond(); + for (int i = 0; i < count; i++) { + // Generate a random insert. + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + generator.randomizeRow(row); + int key = row.getInt(0); + // Add the insert to the results. + results.add(insert); + // Initialize the unfinished MutationState. + unfinished.add(new MutationState(key, type, random.nextInt(minMutationsBound))); + } + } + + // Randomly pull from the unfinished list, mutate it and add that operation to the results. + // If it has been mutated at least the minimum number of times, remove it from the unfinished + // list. + while (!unfinished.isEmpty()) { + // Get a random row to mutate. + int index = random.nextInt(unfinished.size()); + MutationState state = unfinished.get(index); + + // If the row is done, remove it from unfinished and continue. + if (state.numMutations >= state.minMutations && state.currentType == state.endType) { + unfinished.remove(index); + continue; + } + + // Otherwise, generate an operation to mutate the row based on its current ChangeType. + // insert -> update|delete + // update -> update|delete + // delete -> insert + Operation op; + if (state.currentType == ChangeType.INSERT || state.currentType == ChangeType.UPDATE) { + op = random.nextBoolean() ? table.newUpdate() : table.newDelete(); + } else { + // Must be a delete, so we need an insert next. + op = table.newInsert(); + } + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, state.key); + generator.randomizeRow(row, /* randomizeKeys */ false); + op.setRow(row); + results.add(op); + + state.currentType = op.getChangeType(); + state.numMutations++; + } + + return results; + } + + private static class MutationState { + final int key; + final ChangeType endType; + final int minMutations; + + ChangeType currentType = ChangeType.INSERT; + int numMutations = 0; + + MutationState(int key, ChangeType endType, int minMutations) { + this.key = key; + this.endType = endType; + this.minMutations = minMutations; + } + } + + @Test(timeout = 100000) + public void testDiffScanIsDeleted() throws Exception { + Schema schema = new Schema(Arrays.asList( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build() + )); + + KuduTable table = client.createTable(tableName, schema, getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + + + // Test a very simple diff scan that should capture one deleted row. + Insert insert = table.newInsert(); + insert.getRow().addInt(0, 0); + session.apply(insert); + long startHT = client.getLastPropagatedTimestamp() + 1; + + Delete delete = table.newDelete(); + delete.getRow().addInt(0, 0); + session.apply(delete); + long endHT = client.getLastPropagatedTimestamp() + 1; + + KuduScanner scanner = client.newScannerBuilder(table) + .diffScan(startHT, endHT) + .build(); + List results = new ArrayList<>(); + for (RowResult row : scanner) { + results.add(row); + } + assertEquals(1, results.size()); + RowResult row = results.get(0); + assertEquals(0, row.getInt(0)); + assertTrue(row.hasIsDeleted()); + assertTrue(row.isDeleted()); + } + + @Test + public void testScannerLeaderChanged() throws Exception { + // Prepare the table for testing. + Schema schema = createManyStringsSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + final int buckets = 2; + createOptions.addHashPartitions(ImmutableList.of("key"), buckets); + createOptions.setNumReplicas(3); + client.createTable(tableName, schema, createOptions); + + KuduSession session = client.newSession(); + KuduTable table = client.openTable(tableName); + final int totalRows = 2000; + for (int i = 0; i < totalRows; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", String.format("key_%02d", i)); + row.addString("c1", "c1_" + i); + row.addString("c2", "c2_" + i); + assertEquals(session.apply(insert).hasRowError(), false); + } + AsyncKuduClient asyncClient = harness.getAsyncClient(); + KuduScanner kuduScanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .replicaSelection(ReplicaSelection.LEADER_ONLY) + .batchSizeBytes(100) + .build(); + + // Open the scanner first. + kuduScanner.nextRows(); + final HostAndPort referenceServerHostPort = harness.findLeaderTabletServer( + new LocatedTablet(kuduScanner.currentTablet())); + final String referenceTabletId = kuduScanner.currentTablet().getTabletId(); + + // Send LeaderStepDown request. + KuduBinaryLocator.ExecutableInfo exeInfo = KuduBinaryLocator.findBinary("kudu"); + LOG.info(harness.getMasterAddressesAsString()); + List commandLine = Lists.newArrayList(exeInfo.exePath(), + "tablet", + "leader_step_down", + harness.findLeaderMasterServer().toString(), + kuduScanner.currentTablet().getTabletId()); + ProcessBuilder processBuilder = new ProcessBuilder(commandLine); + processBuilder.environment().putAll(exeInfo.environment()); + Process stepDownProcess = processBuilder.start(); + assertEquals(0, stepDownProcess.waitFor()); + + // Wait until the leader changes. + assertEventuallyTrue( + "The leadership should be transferred", + new AssertHelpers.BooleanExpression() { + @Override + public boolean get() throws Exception { + asyncClient.emptyTabletsCacheForTable(table.getTableId()); + List tablets = table.getTabletsLocations(50000); + LocatedTablet targetTablet = null; + for (LocatedTablet tablet : tablets) { + String tabletId = new String(tablet.getTabletId(), UTF_8); + if (tabletId.equals(referenceTabletId)) { + targetTablet = tablet; + } + } + HostAndPort targetHp = harness.findLeaderTabletServer(targetTablet); + return !targetHp.equals(referenceServerHostPort); + } + }, + 10000/*timeoutMillis*/); + + // Simulate that another request(like Batch) has sent to the wrong leader tablet server and + // the change of leadership has been acknowledged. The response will demote the leader. + kuduScanner.currentTablet().demoteLeader( + kuduScanner.currentTablet().getLeaderServerInfo().getUuid()); + asyncClient.emptyTabletsCacheForTable(table.getTableId()); + + int rowsScannedInNextScans = 0; + try { + while (kuduScanner.hasMoreRows()) { + rowsScannedInNextScans += kuduScanner.nextRows().numRows; + } + } catch (Exception ex) { + assertFalse(ex.getMessage().matches(".*Scanner .* not found.*")); + } + assertTrue(rowsScannedInNextScans > 0); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduScannerPrefetching.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduScannerPrefetching.java new file mode 100644 index 0000000000..9383ba9d1f --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduScannerPrefetching.java @@ -0,0 +1,307 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.client.AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS; +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableList; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.test.KuduTestHarness; + +/** + * KUDU-1260: Test Kudu scanner prefetching + * + */ +public class TestKuduScannerPrefetching { + private static final Logger LOG = LoggerFactory.getLogger(ITClient.class); + + private static final String RUNTIME_PROPERTY_NAME = "scannerwithprefetching.runtime.seconds"; + private static final long DEFAULT_RUNTIME_SECONDS = 60; + + // Time we'll spend waiting at the end of the test for things to settle. Also + // the minimum this test can run for. + private static final long TEST_MIN_RUNTIME_SECONDS = 2; + + private static final long TEST_TIMEOUT_SECONDS = 600000; + + private static final String TABLE_NAME = + TestKuduScannerPrefetching.class.getName() + "-" + System.currentTimeMillis(); + + // Tracks whether it's time for the test to end or not. + private CountDownLatch keepRunningLatch; + + // If the test fails, will contain an exception that describes the failure. + private Exception failureException; + + private KuduTable table; + private long runtimeInSeconds; + + private volatile long sharedWriteTimestamp; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() throws Exception { + // Set (or reset, in the event of a retry) test state. + keepRunningLatch = new CountDownLatch(1); + failureException = null; + sharedWriteTimestamp = 0; + + // Extract and verify the test's running time. + String runtimeProp = System.getProperty(RUNTIME_PROPERTY_NAME); + runtimeInSeconds = runtimeProp == null ? DEFAULT_RUNTIME_SECONDS : Long.parseLong(runtimeProp); + if (runtimeInSeconds < TEST_MIN_RUNTIME_SECONDS || runtimeInSeconds > TEST_TIMEOUT_SECONDS) { + Assert.fail("This test needs to run more than " + TEST_MIN_RUNTIME_SECONDS + " seconds" + + " and less than " + TEST_TIMEOUT_SECONDS + " seconds"); + } + LOG.info("Test will run for {} seconds", runtimeInSeconds); + + // Create the test table. + CreateTableOptions builder = new CreateTableOptions().setNumReplicas(3); + builder.setRangePartitionColumns(ImmutableList.of("key")); + table = harness.getClient().createTable(TABLE_NAME, getBasicSchema(), builder); + } + + /** + * Check the scan results for two scanners w/o prefetching + * + * @throws Exception + */ + @Test(timeout = TEST_TIMEOUT_SECONDS) + public void testWithPrefetching() throws Exception { + List threads = new ArrayList<>(); + TestKuduScannerPrefetching.WriterThread wt = new TestKuduScannerPrefetching.WriterThread(); + TestKuduScannerPrefetching.ScannerThread st = new TestKuduScannerPrefetching.ScannerThread(); + threads.add(new Thread(wt, "writer-test-thread")); + threads.add(new Thread(st, "scanner-test-thread")); + for (Thread thread : threads) { + thread.setUncaughtExceptionHandler(new TestKuduScannerPrefetching.UncaughtExceptionHandler()); + thread.start(); + } + + // If we time out here, the test ran to completion and passed. Otherwise, a + // count down was triggered from an error and the test failed. + boolean failure = keepRunningLatch.await(runtimeInSeconds, TimeUnit.SECONDS); + if (!failure) { + // The test passed but the threads are still running; tell them to stop. + keepRunningLatch.countDown(); + } + + for (Thread thread : threads) { + // Give plenty of time for threads to stop. + thread.join(DEFAULT_SLEEP); + } + + if (failure) { + throw failureException; + } + Assert.assertTrue(wt.currentRowKey + " should be higher than 0", wt.currentRowKey > 0); + Assert.assertTrue(st.totalRowCount + " should be higher than 0", st.totalRowCount > 0); + } + + /** + * Logs an error message and triggers the count down latch, stopping this test. + * + * @param message error message to print + * @param exception optional exception to print + */ + private void reportError(String message, Exception exception) { + failureException = new Exception(message, exception); + keepRunningLatch.countDown(); + } + + /** + * Thread that writes sequentially to the table. Every 10 rows it considers setting the flush mode + * to MANUAL_FLUSH or AUTO_FLUSH_SYNC. + */ + class WriterThread implements Runnable { + + private final KuduSession session = harness.getClient().newSession(); + private int currentRowKey = 0; + + @Override + public void run() { + session.setExternalConsistencyMode(ExternalConsistencyMode.CLIENT_PROPAGATED); + while (keepRunningLatch.getCount() > 0) { + try { + OperationResponse resp = session.apply(createBasicSchemaInsert(table, currentRowKey)); + if (hasRowErrorAndReport(resp)) { + return; + } + currentRowKey++; + } catch (Exception e) { + if (keepRunningLatch.getCount() == 0) { + // Likely shutdown() related. + LOG.error("Error occurs: " + e.getMessage()); + return; + } + reportError("Got error while inserting row " + currentRowKey, e); + return; + } + } + LOG.info("Stop writing"); + } + + private boolean hasRowErrorAndReport(OperationResponse resp) { + if (resp != null && resp.hasRowError()) { + reportError("The following RPC " + resp.getOperation().getRow() + + " returned this error: " + resp.getRowError(), null); + return true; + } + + if (resp == null) { + return false; + } + + sharedWriteTimestamp = resp.getWriteTimestampRaw(); + return false; + } + } + + /** + * Thread that scans the table. Alternates randomly between random gets and full table scans. + */ + class ScannerThread implements Runnable { + // Updated by calling a full scan. + private int totalRowCount = 0; + + @Override + public void run() { + while (keepRunningLatch.getCount() > 0) { + boolean shouldContinue = true; + if (sharedWriteTimestamp == 0) { + shouldContinue = true; + } else { + shouldContinue = fullScan(); + } + if (!shouldContinue) { + return; + } + + if (totalRowCount == 0) { + try { + keepRunningLatch.await(50, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + // Test is stopping. + return; + } + } + } + } + + /** + * Runs a full table scan and verify the results + * + * @return true if the full scan was successful, false if there was an error + */ + private boolean fullScan() { + int rowCount; + int rowCount2; + TimeoutTracker timeoutTracker = new TimeoutTracker(); + timeoutTracker.setTimeout(DEFAULT_SLEEP); + + while (keepRunningLatch.getCount() > 0 && !timeoutTracker.timedOut()) { + long snapshot = sharedWriteTimestamp; + KuduScanner scanner = getSnapshotScannerBuilder(snapshot).prefetching(true).build(); + KuduScanner scannerNoPrefetching = + getSnapshotScannerBuilder(snapshot).prefetching(false).build(); + try { + rowCount = countRowsInScan(scanner); + rowCount2 = countRowsInScan(scannerNoPrefetching); + } catch (KuduException e) { + return checkAndReportError("Got error while row counting", e); + } + Assert.assertEquals(rowCount, rowCount2); + totalRowCount += rowCount; + // Due to the lack of KUDU-430, we need to loop for a while. + try { + keepRunningLatch.await(50, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + // No need to do anything, we'll exit the loop once we test getCount() in the condition. + } + } + return !timeoutTracker.timedOut(); + } + + private KuduScanner.KuduScannerBuilder getSnapshotScannerBuilder(long snapshot) { + return harness.getClient().newScannerBuilder(table) + .readMode(AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT) + .snapshotTimestampRaw(snapshot) + .batchSizeBytes(128) + .keepAlivePeriodMs(DEFAULT_KEEP_ALIVE_PERIOD_MS) + .setFaultTolerant(false); + } + + /** + * Checks the passed exception contains "Scanner not found". If it does then it returns true, + * else it reports the error and returns false. + * We need to do this because the scans in this client aren't fault tolerant. + * + * @param message message to print if the exception contains a real error + * @param e the exception to check + * @return true if the scanner failed on a non-FATAL error, otherwise false which will kill + * this test + */ + private boolean checkAndReportError(String message, KuduException e) { + // It's possible to get timeouts if we're unlucky. A particularly common one is + // "could not wait for desired snapshot timestamp to be consistent" since we're using + // READ_AT_SNAPSHOT scanners. + // TODO revisit once KUDU-1656 is taken care of. + if (e.getStatus().isTimedOut()) { + LOG.warn("Received a scan timeout", e); + return true; + } + // Do nasty things, expect nasty results. The scanners are a bit too happy to retry TS + // disconnections so we might end up retrying a scanner on a node that restarted, or we might + // get disconnected just after sending an RPC so when we reconnect to the same TS we might get + // the "Invalid call sequence ID" message. + if (!e.getStatus().isNotFound() && + !e.getStatus().getMessage().contains("Invalid call sequence ID")) { + reportError(message, e); + return false; + } + return true; + } + } + + private class UncaughtExceptionHandler implements Thread.UncaughtExceptionHandler { + @Override + public void uncaughtException(Thread t, Throwable e) { + // Only report an error if we're still running, else we'll spam the log. + if (keepRunningLatch.getCount() != 0) { + reportError("Uncaught exception", new Exception(e)); + } + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduSession.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduSession.java new file mode 100644 index 0000000000..723d4eba94 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduSession.java @@ -0,0 +1,930 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.createSchemaWithImmutableColumns; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicTableOptionsWithNonCoveredRange; +import static org.apache.kudu.test.ClientTestUtil.scanTableToStrings; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.List; + +import com.google.common.collect.ImmutableList; +import org.hamcrest.CoreMatchers; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.test.ClientTestUtil; +import org.apache.kudu.test.KuduTestHarness; + +public class TestKuduSession { + private static final String tableName = "TestKuduSession"; + + private static final Schema basicSchema = ClientTestUtil.getBasicSchema(); + + private KuduClient client; + private AsyncKuduClient asyncClient; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + } + + @Test(timeout = 100000) + public void testBasicOps() throws Exception { + KuduTable table = client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + + KuduSession session = client.newSession(); + for (int i = 0; i < 10; i++) { + session.apply(createInsert(table, i)); + } + assertEquals(10, countRowsInScan(client.newScannerBuilder(table).build())); + + OperationResponse resp = session.apply(createInsert(table, 0)); + assertTrue(resp.hasRowError()); + + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + for (int i = 10; i < 20; i++) { + session.apply(createInsert(table, i)); + } + session.flush(); + assertEquals(20, countRowsInScan(client.newScannerBuilder(table).build())); + } + + @Test(timeout = 100000) + public void testIgnoreAllDuplicateRows() throws Exception { + KuduTable table = client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + + KuduSession session = client.newSession(); + session.setIgnoreAllDuplicateRows(true); + for (int i = 0; i < 10; i++) { + session.apply(createInsert(table, i)); + } + // Test all of the various flush modes to be sure we correctly handle errors in + // individual operations and batches. + for (SessionConfiguration.FlushMode mode : SessionConfiguration.FlushMode.values()) { + session.setFlushMode(mode); + for (int i = 0; i < 10; i++) { + OperationResponse resp = session.apply(createInsert(table, i)); + if (mode == SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC) { + assertFalse(resp.hasRowError()); + } + } + if (mode == SessionConfiguration.FlushMode.MANUAL_FLUSH) { + List responses = session.flush(); + for (OperationResponse resp : responses) { + assertFalse(resp.hasRowError()); + } + } else if (mode == SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND) { + while (session.hasPendingOperations()) { + Thread.sleep(100); + } + assertEquals(0, session.countPendingErrors()); + } + } + } + + @Test(timeout = 100000) + public void testIgnoreAllNotFoundRows() throws Exception { + KuduTable table = client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + + KuduSession session = client.newSession(); + session.setIgnoreAllNotFoundRows(true); + // Test all of the various flush modes to be sure we correctly handle errors in + // individual operations and batches. + for (SessionConfiguration.FlushMode mode : SessionConfiguration.FlushMode.values()) { + session.setFlushMode(mode); + for (int i = 0; i < 10; i++) { + session.apply(createDelete(table, i)); + OperationResponse resp = session.apply(createInsert(table, i)); + if (mode == SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC) { + assertFalse(resp.hasRowError()); + } + } + if (mode == SessionConfiguration.FlushMode.MANUAL_FLUSH) { + List responses = session.flush(); + for (OperationResponse resp : responses) { + assertFalse(resp.hasRowError()); + } + } else if (mode == SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND) { + while (session.hasPendingOperations()) { + Thread.sleep(100); + } + assertEquals(0, session.countPendingErrors()); + } + } + } + + @Test(timeout = 100000) + public void testBatchWithSameRow() throws Exception { + KuduTable table = client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + // Insert 25 rows, one per batch, along with 50 updates for each, and a delete at the end, + // while also clearing the cache between each batch half the time. The delete is added here + // so that a misplaced update would fail if it happens later than its delete. + for (int i = 0; i < 25; i++) { + session.apply(createInsert(table, i)); + for (int j = 0; j < 50; j++) { + Update update = table.newUpdate(); + PartialRow row = update.getRow(); + row.addInt(basicSchema.getColumnByIndex(0).getName(), i); + row.addInt(basicSchema.getColumnByIndex(1).getName(), 1000); + session.apply(update); + } + Delete del = table.newDelete(); + PartialRow row = del.getRow(); + row.addInt(basicSchema.getColumnByIndex(0).getName(), i); + session.apply(del); + session.flush(); + if (i % 2 == 0) { + asyncClient.emptyTabletsCacheForTable(table.getTableId()); + } + } + assertEquals(0, countRowsInScan(client.newScannerBuilder(table).build())); + } + + @Test(timeout = 100000) + public void testDeleteWithFullRow() throws Exception { + KuduTable table = client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + List rows = new ArrayList<>(); + for (int i = 0; i < 25; i++) { + Insert insert = createInsert(table, i); + rows.add(insert.getRow()); + session.apply(insert); + } + session.flush(); + + for (PartialRow row : rows) { + Operation del; + if (row.getInt(0) % 2 == 0) { + del = table.newDelete(); + } else { + del = table.newDeleteIgnore(); + } + del.setRow(row); + session.apply(del); + } + session.flush(); + + assertEquals(0, session.countPendingErrors()); + assertEquals(0, countRowsInScan(client.newScannerBuilder(table).build())); + } + + /** Regression test for KUDU-3198. Delete with full row from a 64-column table. */ + @Test(timeout = 100000) + public void testDeleteWithFullRowFrom64ColumnTable() throws Exception { + ArrayList columns = new ArrayList<>(64); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build()); + for (int i = 1; i < 64; i++) { + columns.add(new ColumnSchema.ColumnSchemaBuilder("column_" + i, Type.STRING) + .nullable(true) + .build()); + } + Schema schema = new Schema(columns); + + KuduTable table = client.createTable(tableName, schema, getBasicCreateTableOptions()); + + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + // Insert 25 rows and then delete them. + List rows = new ArrayList<>(); + for (int i = 0; i < 25; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt(0, 1); + for (int j = 1; j < 64; j++) { + if (j % 2 == 0) { + row.setNull(j); + } else { + row.addString(j, "val_" + j); + } + } + rows.add(row); + session.apply(insert); + } + session.flush(); + + for (PartialRow row : rows) { + Operation del; + if (row.getInt(0) % 2 == 0) { + del = table.newDelete(); + } else { + del = table.newDeleteIgnore(); + } + del.setRow(row); + session.apply(del); + } + session.flush(); + + assertEquals(0, session.countPendingErrors()); + assertEquals(0, countRowsInScan(client.newScannerBuilder(table).build())); + } + + /** + * Regression test for KUDU-1402. Calls to session.flush() should return an empty list + * instead of null. + * @throws Exception + */ + @Test(timeout = 100000) + public void testEmptyFlush() throws Exception { + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + List result = session.flush(); + assertNotNull(result); + assertTrue(result.isEmpty()); + } + + /** + * Regression test for KUDU-1226. Calls to session.flush() concurrent with AUTO_FLUSH_BACKGROUND + * can end up giving ConvertBatchToListOfResponsesCB a list with nulls if a tablet was already + * flushed. Only happens with multiple tablets. + */ + @Test(timeout = 100000) + public void testConcurrentFlushes() throws Exception { + CreateTableOptions builder = getBasicCreateTableOptions(); + int numTablets = 4; + int numRowsPerTablet = 100; + + // Create a 4 tablets table split on 1000, 2000, and 3000. + for (int i = 1; i < numTablets; i++) { + PartialRow split = basicSchema.newPartialRow(); + split.addInt(0, i * numRowsPerTablet); + builder.addSplitRow(split); + } + KuduTable table = client.createTable(tableName, basicSchema, builder); + + // Configure the session to background flush as often as it can (every 1ms). + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + session.setFlushInterval(1); + + // Fill each tablet in parallel 1 by 1 then flush. Without the fix this would quickly get an + // NPE. + for (int i = 0; i < numRowsPerTablet; i++) { + for (int j = 0; j < numTablets; j++) { + session.apply(createInsert(table, i + (numRowsPerTablet * j))); + } + session.flush(); + } + } + + @Test(timeout = 10000) + public void testOverWritingValues() throws Exception { + final KuduTable table = + client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + final KuduSession session = client.newSession(); + Insert insert = createInsert(table, 0); + PartialRow row = insert.getRow(); + + // Overwrite all the normal columns. + int magicNumber = 9999; + row.addInt(1, magicNumber); + row.addInt(2, magicNumber); + row.addBoolean(4, false); + // Spam the string column since it's backed by an array. + for (int i = 0; i <= magicNumber; i++) { + row.addString(3, i + ""); + } + // We're supposed to keep a constant size. + assertEquals(5, row.getVarLengthData().size()); + session.apply(insert); + + KuduScanner scanner = client.newScannerBuilder(table).build(); + RowResult rr = scanner.nextRows().next(); + assertEquals(magicNumber, rr.getInt(1)); + assertEquals(magicNumber, rr.getInt(2)); + assertEquals(magicNumber + "", rr.getString(3)); + assertEquals(false, rr.getBoolean(4)); + + // Test setting a value post-apply. + try { + row.addInt(1, 0); + fail("Row should be frozen and throw"); + } catch (IllegalStateException ex) { + // Ok. + } + } + + private void doVerifyMetrics(KuduSession session, + long successfulInserts, + long insertIgnoreErrors, + long successfulUpserts, + long upsertIgnoreErrors, + long successfulUpdates, + long updateIgnoreErrors, + long successfulDeletes, + long deleteIgnoreErrors) { + ResourceMetrics metrics = session.getWriteOpMetrics(); + assertEquals(successfulInserts, metrics.getMetric("successful_inserts")); + assertEquals(insertIgnoreErrors, metrics.getMetric("insert_ignore_errors")); + assertEquals(successfulUpserts, metrics.getMetric("successful_upserts")); + assertEquals(upsertIgnoreErrors, metrics.getMetric("upsert_ignore_errors")); + assertEquals(successfulUpdates, metrics.getMetric("successful_updates")); + assertEquals(updateIgnoreErrors, metrics.getMetric("update_ignore_errors")); + assertEquals(successfulDeletes, metrics.getMetric("successful_deletes")); + assertEquals(deleteIgnoreErrors, metrics.getMetric("delete_ignore_errors")); + } + + @Test(timeout = 10000) + public void testUpsert() throws Exception { + KuduTable table = client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + + // Test an Upsert that acts as an Insert. + assertFalse(session.apply(createUpsert(table, 1, 1, false)).hasRowError()); + + List rowStrings = scanTableToStrings(table); + assertEquals(1, rowStrings.size()); + assertEquals( + "INT32 key=1, INT32 column1_i=1, INT32 column2_i=3, " + + "STRING column3_s=a string, BOOL column4_b=true", + rowStrings.get(0)); + doVerifyMetrics(session, 0, 0, 1, 0, 0, 0, 0, 0); + + // Test an Upsert that acts as an Update. + assertFalse(session.apply(createUpsert(table, 1, 2, false)).hasRowError()); + rowStrings = scanTableToStrings(table); + assertEquals( + "INT32 key=1, INT32 column1_i=2, INT32 column2_i=3, " + + "STRING column3_s=a string, BOOL column4_b=true", + rowStrings.get(0)); + doVerifyMetrics(session, 0, 0, 2, 0, 0, 0, 0, 0); + } + + @Test(timeout = 10000) + public void testInsertIgnoreAfterInsertHasNoRowError() throws Exception { + KuduTable table = client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + session.apply(createInsert(table, 1)); + session.apply(createUpsert(table, 1, 1, false)); + session.apply(createInsertIgnore(table, 1)); + List results = session.flush(); + doVerifyMetrics(session, 1, 1, 1, 0, 0, 0, 0, 0); + for (OperationResponse result : results) { + assertFalse(result.toString(), result.hasRowError()); + } + List rowStrings = scanTableToStrings(table); + assertEquals(1, rowStrings.size()); + assertEquals( + "INT32 key=1, INT32 column1_i=1, INT32 column2_i=3, " + + "STRING column3_s=a string, BOOL column4_b=true", + rowStrings.get(0)); + } + + @Test(timeout = 10000) + public void testInsertAfterInsertIgnoreHasRowError() throws Exception { + KuduTable table = client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + session.apply(createInsertIgnore(table, 1)); + session.apply(createInsert(table, 1)); + List results = session.flush(); + doVerifyMetrics(session, 1, 0, 0, 0, 0, 0, 0, 0); + assertFalse(results.get(0).toString(), results.get(0).hasRowError()); + assertTrue(results.get(1).toString(), results.get(1).hasRowError()); + assertTrue(results.get(1).getRowError().getErrorStatus().isAlreadyPresent()); + List rowStrings = scanTableToStrings(table); + assertEquals(1, rowStrings.size()); + assertEquals( + "INT32 key=1, INT32 column1_i=2, INT32 column2_i=3, " + + "STRING column3_s=a string, BOOL column4_b=true", + rowStrings.get(0)); + } + + @Test(timeout = 10000) + public void testInsertIgnore() throws Exception { + KuduTable table = client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + + // Test insert ignore implements normal insert. + assertFalse(session.apply(createInsertIgnore(table, 1)).hasRowError()); + List rowStrings = scanTableToStrings(table); + assertEquals( + "INT32 key=1, INT32 column1_i=2, INT32 column2_i=3, " + + "STRING column3_s=a string, BOOL column4_b=true", + rowStrings.get(0)); + doVerifyMetrics(session, 1, 0, 0, 0, 0, 0, 0, 0); + + // Test insert ignore does not return a row error. + assertFalse(session.apply(createInsertIgnore(table, 1)).hasRowError()); + rowStrings = scanTableToStrings(table); + assertEquals( + "INT32 key=1, INT32 column1_i=2, INT32 column2_i=3, " + + "STRING column3_s=a string, BOOL column4_b=true", + rowStrings.get(0)); + doVerifyMetrics(session, 1, 1, 0, 0, 0, 0, 0, 0); + + } + + @Test(timeout = 10000) + public void testUpdateIgnore() throws Exception { + KuduTable table = client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + + // Test update ignore does not return a row error. + assertFalse(session.apply(createUpdateIgnore(table, 1, 1, false)).hasRowError()); + assertEquals(0, scanTableToStrings(table).size()); + doVerifyMetrics(session, 0, 0, 0, 0, 0, 1, 0, 0); + + assertFalse(session.apply(createInsert(table, 1)).hasRowError()); + assertEquals(1, scanTableToStrings(table).size()); + doVerifyMetrics(session, 1, 0, 0, 0, 0, 1, 0, 0); + + // Test update ignore implements normal update. + assertFalse(session.apply(createUpdateIgnore(table, 1, 2, false)).hasRowError()); + List rowStrings = scanTableToStrings(table); + assertEquals(1, rowStrings.size()); + assertEquals( + "INT32 key=1, INT32 column1_i=2, INT32 column2_i=3, " + + "STRING column3_s=a string, BOOL column4_b=true", + rowStrings.get(0)); + doVerifyMetrics(session, 1, 0, 0, 0, 1, 1, 0, 0); + } + + @Test(timeout = 10000) + public void testDeleteIgnore() throws Exception { + KuduTable table = client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + + // Test delete ignore does not return a row error. + assertFalse(session.apply(createDeleteIgnore(table, 1)).hasRowError()); + doVerifyMetrics(session, 0, 0, 0, 0, 0, 0, 0, 1); + + assertFalse(session.apply(createInsert(table, 1)).hasRowError()); + assertEquals(1, scanTableToStrings(table).size()); + doVerifyMetrics(session, 1, 0, 0, 0, 0, 0, 0, 1); + + // Test delete ignore implements normal delete. + assertFalse(session.apply(createDeleteIgnore(table, 1)).hasRowError()); + assertEquals(0, scanTableToStrings(table).size()); + doVerifyMetrics(session, 1, 0, 0, 0, 0, 0, 1, 1); + } + + @Test(timeout = 10000) + public void testInsertManualFlushNonCoveredRange() throws Exception { + CreateTableOptions createOptions = getBasicTableOptionsWithNonCoveredRange(); + createOptions.setNumReplicas(1); + client.createTable(tableName, basicSchema, createOptions); + KuduTable table = client.openTable(tableName); + + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + // Insert in reverse sorted order so that more table location lookups occur + // (the extra results in table location lookups always occur past the inserted key). + List nonCoveredKeys = ImmutableList.of(350, 300, 199, 150, 100, -1, -50); + for (int key : nonCoveredKeys) { + assertNull(session.apply(createBasicSchemaInsert(table, key))); + } + List results = session.flush(); + assertEquals(nonCoveredKeys.size(), results.size()); + for (OperationResponse result : results) { + assertTrue(result.hasRowError()); + assertTrue(result.getRowError().getErrorStatus().isNotFound()); + } + + // Insert a batch of some valid and some invalid. + for (int key = 90; key < 110; key++) { + session.apply(createBasicSchemaInsert(table, key)); + } + results = session.flush(); + + int failures = 0; + for (OperationResponse result : results) { + if (result.hasRowError()) { + failures++; + assertTrue(result.getRowError().getErrorStatus().isNotFound()); + } + } + assertEquals(10, failures); + } + + @Test(timeout = 10000) + public void testInsertManualFlushResponseOrder() throws Exception { + CreateTableOptions createOptions = getBasicTableOptionsWithNonCoveredRange(); + createOptions.setNumReplicas(1); + client.createTable(tableName, basicSchema, createOptions); + KuduTable table = client.openTable(tableName); + + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + // Insert a batch of some valid and some invalid. + for (int i = 0; i < 10; i++) { + assertNull(session.apply(createBasicSchemaInsert(table, 100 + i * 10))); + assertNull(session.apply(createBasicSchemaInsert(table, 200 + i * 10))); + } + List results = session.flush(); + + assertEquals(20, results.size()); + + for (int i = 0; i < 20; i++) { + OperationResponse result = results.get(i); + if (i % 2 == 0) { + assertTrue(result.hasRowError()); + assertTrue(result.getRowError().getErrorStatus().isNotFound()); + } else { + assertFalse(result.hasRowError()); + } + } + } + + @Test(timeout = 10000) + public void testNonCoveredRangeException() throws Exception { + CreateTableOptions createOptions = getBasicTableOptionsWithNonCoveredRange(); + createOptions.setNumReplicas(1); + client.createTable(tableName, basicSchema, createOptions); + KuduTable table = client.openTable(tableName); + Insert insert = createInsert(table, 150); + + //AUTO_FLUSH_SYNC case + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + OperationResponse apply = session.apply(insert); + assertTrue(apply.hasRowError()); + System.err.println(apply.getRowError().getErrorStatus().getMessage()); + assertTrue(apply.getRowError().getErrorStatus().getMessage().contains( + "does not exist in table: TestKuduSession")); + //AUTO_FLUSH_BACKGROUND case + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + assertEquals(null, session.apply(insert)); + List autoFlushResult = session.flush(); + assertEquals(1, autoFlushResult.size()); + OperationResponse responseAuto = autoFlushResult.get(0); + assertTrue(responseAuto.hasRowError()); + assertTrue(responseAuto.getRowError().getErrorStatus().getMessage().contains( + "does not exist in table: TestKuduSession")); + //MANUAL_FLUSH case + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + assertEquals(null, session.apply(insert)); + List manualFlushResult = session.flush(); + assertEquals(1, manualFlushResult.size()); + OperationResponse responseManual = manualFlushResult.get(0); + assertTrue(responseManual.hasRowError()); + assertTrue(responseManual.getRowError().getErrorStatus().getMessage().contains( + "does not exist in table: TestKuduSession")); + } + + @Test(timeout = 10000) + public void testInsertAutoFlushSyncNonCoveredRange() throws Exception { + CreateTableOptions createOptions = getBasicTableOptionsWithNonCoveredRange(); + createOptions.setNumReplicas(1); + client.createTable(tableName, basicSchema, createOptions); + KuduTable table = client.openTable(tableName); + + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + + List nonCoveredKeys = ImmutableList.of(350, 300, 199, 150, 100, -1, -50); + for (int key : nonCoveredKeys) { + OperationResponse response = session.apply(createBasicSchemaInsert(table, key)); + assertTrue(response.hasRowError()); + assertTrue(response.getRowError().getErrorStatus().isNotFound()); + } + } + + @Test(timeout = 10000) + public void testInsertAutoFlushBackgroundNonCoveredRange() throws Exception { + CreateTableOptions createOptions = getBasicTableOptionsWithNonCoveredRange(); + createOptions.setNumReplicas(1); + client.createTable(tableName, basicSchema, createOptions); + KuduTable table = client.openTable(tableName); + + AsyncKuduSession session = asyncClient.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + + List nonCoveredKeys = ImmutableList.of(350, 300, 199, 150, 100, -1, -50); + for (int key : nonCoveredKeys) { + OperationResponse result = session.apply(createBasicSchemaInsert(table, key)).join(5000); + assertTrue(result.hasRowError()); + assertTrue(result.getRowError().getErrorStatus().isNotFound()); + } + + RowErrorsAndOverflowStatus errors = session.getPendingErrors(); + assertEquals(nonCoveredKeys.size(), errors.getRowErrors().length); + for (RowError error : errors.getRowErrors()) { + assertTrue(error.getErrorStatus().isNotFound()); + } + + // Insert a batch of some valid and some invalid. + for (int key = 90; key < 110; key++) { + session.apply(createBasicSchemaInsert(table, key)); + } + session.flush().join(5000); + + errors = session.getPendingErrors(); + assertEquals(10, errors.getRowErrors().length); + for (RowError error : errors.getRowErrors()) { + assertTrue(error.getErrorStatus().isNotFound()); + } + } + + @Test(timeout = 10000) + public void testUpdateOnTableWithImmutableColumn() throws Exception { + // Create a table with an immutable column. + KuduTable table = client.createTable( + tableName, createSchemaWithImmutableColumns(), getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + + // Insert some data and verify it. + assertFalse(session.apply(createInsertOnTableWithImmutableColumn(table, 1)).hasRowError()); + List rowStrings = scanTableToStrings(table); + assertEquals(1, rowStrings.size()); + assertEquals("INT32 key=1, INT32 column1_i=2, INT32 column2_i=3, " + + "STRING column3_s=a string, BOOL column4_b=true, INT32 column5_i=6", + rowStrings.get(0)); + // successfulInserts++ + doVerifyMetrics(session, 1, 0, 0, 0, 0, 0, 0, 0); + + // Test an Update can update row without immutable column set. + final String expectRow = "INT32 key=1, INT32 column1_i=3, INT32 column2_i=3, " + + "STRING column3_s=NULL, BOOL column4_b=true, INT32 column5_i=6"; + assertFalse(session.apply(createUpdateOnTableWithImmutableColumn( + table, 1, false)).hasRowError()); + rowStrings = scanTableToStrings(table); + assertEquals(expectRow, rowStrings.get(0)); + // successfulUpdates++ + doVerifyMetrics(session, 1, 0, 0, 0, 1, 0, 0, 0); + + // Test an Update results in an error when attempting to update row having at least + // one column with the immutable attribute set. + OperationResponse resp = session.apply(createUpdateOnTableWithImmutableColumn( + table, 1, true)); + assertTrue(resp.hasRowError()); + assertTrue(resp.getRowError().getErrorStatus().isImmutable()); + Assert.assertThat(resp.getRowError().getErrorStatus().toString(), + CoreMatchers.containsString("Immutable: UPDATE not allowed for " + + "immutable column: column5_i INT32 NULLABLE IMMUTABLE")); + + // nothing changed + rowStrings = scanTableToStrings(table); + assertEquals(expectRow, rowStrings.get(0)); + doVerifyMetrics(session, 1, 0, 0, 0, 1, 0, 0, 0); + } + + @Test(timeout = 10000) + public void testUpdateIgnoreOnTableWithImmutableColumn() throws Exception { + // Create a table with an immutable column. + KuduTable table = client.createTable( + tableName, createSchemaWithImmutableColumns(), getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + + // Insert some data and verify it. + assertFalse(session.apply(createInsertOnTableWithImmutableColumn(table, 1)).hasRowError()); + List rowStrings = scanTableToStrings(table); + assertEquals(1, rowStrings.size()); + assertEquals("INT32 key=1, INT32 column1_i=2, INT32 column2_i=3, " + + "STRING column3_s=a string, BOOL column4_b=true, INT32 column5_i=6", + rowStrings.get(0)); + // successfulInserts++ + doVerifyMetrics(session, 1, 0, 0, 0, 0, 0, 0, 0); + + final String expectRow = "INT32 key=1, INT32 column1_i=3, INT32 column2_i=3, " + + "STRING column3_s=NULL, BOOL column4_b=true, INT32 column5_i=6"; + + // Test an UpdateIgnore can update a row without changing the immutable column cell, + // the error of updating the immutable column will be ignored. + assertFalse(session.apply(createUpdateIgnoreOnTableWithImmutableColumn( + table, 1, true)).hasRowError()); + rowStrings = scanTableToStrings(table); + assertEquals(expectRow, rowStrings.get(0)); + // successfulUpdates++, updateIgnoreErrors++ + doVerifyMetrics(session, 1, 0, 0, 0, 1, 1, 0, 0); + + // Test an UpdateIgnore only on immutable column. Note that this will result in + // a 'Invalid argument: No fields updated' error. + OperationResponse resp = session.apply(createUpdateIgnoreOnTableWithImmutableColumn( + table, 1, false)); + assertTrue(resp.hasRowError()); + assertTrue(resp.getRowError().getErrorStatus().isInvalidArgument()); + Assert.assertThat(resp.getRowError().getErrorStatus().toString(), + CoreMatchers.containsString("Invalid argument: No fields updated, " + + "key is: (int32 key=)")); + + // nothing changed + rowStrings = scanTableToStrings(table); + assertEquals(expectRow, rowStrings.get(0)); + doVerifyMetrics(session, 1, 0, 0, 0, 1, 1, 0, 0); + } + + @Test(timeout = 10000) + public void testUpsertIgnoreOnTableWithImmutableColumn() throws Exception { + // Create a table with an immutable column. + KuduTable table = client.createTable( + tableName, createSchemaWithImmutableColumns(), getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + + // Insert some data and verify it. + assertFalse(session.apply(createUpsertIgnoreOnTableWithImmutableColumn( + table, 1, 2, true)).hasRowError()); + List rowStrings = scanTableToStrings(table); + assertEquals(1, rowStrings.size()); + assertEquals("INT32 key=1, INT32 column1_i=2, INT32 column2_i=3, " + + "STRING column3_s=NULL, BOOL column4_b=true, INT32 column5_i=4", + rowStrings.get(0)); + // successfulUpserts++ + doVerifyMetrics(session, 0, 0, 1, 0, 0, 0, 0, 0); + + // Test an UpsertIgnore can update row without immutable column set. + assertFalse(session.apply(createUpsertIgnoreOnTableWithImmutableColumn( + table, 1, 3, false)).hasRowError()); + rowStrings = scanTableToStrings(table); + assertEquals("INT32 key=1, INT32 column1_i=3, INT32 column2_i=3, " + + "STRING column3_s=NULL, BOOL column4_b=true, INT32 column5_i=4", rowStrings.get(0)); + // successfulUpserts++ + doVerifyMetrics(session, 0, 0, 2, 0, 0, 0, 0, 0); + + // Test an UpsertIgnore can update row with immutable column set. + assertFalse(session.apply(createUpsertIgnoreOnTableWithImmutableColumn( + table, 1, 4, true)).hasRowError()); + rowStrings = scanTableToStrings(table); + assertEquals("INT32 key=1, INT32 column1_i=4, INT32 column2_i=3, " + + "STRING column3_s=NULL, BOOL column4_b=true, INT32 column5_i=4", rowStrings.get(0)); + // successfulUpserts++, upsertIgnoreErrors++ + doVerifyMetrics(session, 0, 0, 3, 1, 0, 0, 0, 0); + } + + @Test(timeout = 10000) + public void testUpsertOnTableWithImmutableColumn() throws Exception { + // Create a table with an immutable column. + KuduTable table = client.createTable( + tableName, createSchemaWithImmutableColumns(), getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + + final String expectRow = "INT32 key=1, INT32 column1_i=2, INT32 column2_i=3, " + + "STRING column3_s=NULL, BOOL column4_b=true, INT32 column5_i=4"; + // Insert some data and verify it. + assertFalse(session.apply(createUpsertOnTableWithImmutableColumn( + table, 1, 2, true)).hasRowError()); + List rowStrings = scanTableToStrings(table); + assertEquals(1, rowStrings.size()); + assertEquals(expectRow, rowStrings.get(0)); + // successfulUpserts++ + doVerifyMetrics(session, 0, 0, 1, 0, 0, 0, 0, 0); + + // Test an Upsert attemp to update an immutable column, which will result an error. + OperationResponse resp = session.apply(createUpsertOnTableWithImmutableColumn( + table, 1, 3, true)); + assertTrue(resp.hasRowError()); + assertTrue(resp.getRowError().getErrorStatus().isImmutable()); + Assert.assertThat(resp.getRowError().getErrorStatus().toString(), + CoreMatchers.containsString("Immutable: UPDATE not allowed for " + + "immutable column: column5_i INT32 NULLABLE IMMUTABLE")); + + // nothing changed + rowStrings = scanTableToStrings(table); + assertEquals(expectRow, rowStrings.get(0)); + doVerifyMetrics(session, 0, 0, 1, 0, 0, 0, 0, 0); + } + + private Insert createInsert(KuduTable table, int key) { + return createBasicSchemaInsert(table, key); + } + + private Insert createInsertOnTableWithImmutableColumn(KuduTable table, int key) { + Insert insert = createBasicSchemaInsert(table, key); + insert.getRow().addInt(5, 6); + return insert; + } + + private Update createUpdateOnTableWithImmutableColumn(KuduTable table, int key, + boolean updateImmutableColumn) { + Update update = table.newUpdate(); + populateUpdateRow(update.getRow(), key, key * 3, true); + if (updateImmutableColumn) { + update.getRow().addInt(5, 6); + } + + return update; + } + + private UpdateIgnore createUpdateIgnoreOnTableWithImmutableColumn( + KuduTable table, int key, boolean updateNonImmutableColumns) { + UpdateIgnore updateIgnore = table.newUpdateIgnore(); + if (updateNonImmutableColumns) { + populateUpdateRow(updateIgnore.getRow(), key, key * 3, true); + } else { + updateIgnore.getRow().addInt(0, key); + } + updateIgnore.getRow().addInt(5, 6); + + return updateIgnore; + } + + private UpsertIgnore createUpsertIgnoreOnTableWithImmutableColumn( + KuduTable table, int key, int times, boolean updateImmutableColumn) { + UpsertIgnore upsertIgnore = table.newUpsertIgnore(); + populateUpdateRow(upsertIgnore.getRow(), key, key * times, true); + if (updateImmutableColumn) { + upsertIgnore.getRow().addInt(5, key * times * 2); + } + + return upsertIgnore; + } + + private Upsert createUpsertOnTableWithImmutableColumn( + KuduTable table, int key, int times, boolean updateImmutableColumn) { + Upsert upsert = table.newUpsert(); + populateUpdateRow(upsert.getRow(), key, key * times, true); + if (updateImmutableColumn) { + upsert.getRow().addInt(5, key * times * 2); + } + + return upsert; + } + + private Upsert createUpsert(KuduTable table, int key, int secondVal, boolean hasNull) { + Upsert upsert = table.newUpsert(); + populateUpdateRow(upsert.getRow(), key, secondVal, hasNull); + return upsert; + } + + private UpdateIgnore createUpdateIgnore(KuduTable table, int key, int secondVal, + boolean hasNull) { + UpdateIgnore updateIgnore = table.newUpdateIgnore(); + populateUpdateRow(updateIgnore.getRow(), key, secondVal, hasNull); + return updateIgnore; + } + + private void populateUpdateRow(PartialRow row, int key, int secondVal, boolean hasNull) { + row.addInt(0, key); + row.addInt(1, secondVal); + row.addInt(2, 3); + if (hasNull) { + row.setNull(3); + } else { + row.addString(3, "a string"); + } + row.addBoolean(4, true); + } + + private Delete createDelete(KuduTable table, int key) { + Delete delete = table.newDelete(); + PartialRow row = delete.getRow(); + row.addInt(0, key); + return delete; + } + + private DeleteIgnore createDeleteIgnore(KuduTable table, int key) { + DeleteIgnore deleteIgnore = table.newDeleteIgnore(); + PartialRow row = deleteIgnore.getRow(); + row.addInt(0, key); + return deleteIgnore; + } + + protected InsertIgnore createInsertIgnore(KuduTable table, int key) { + InsertIgnore insertIgnore = table.newInsertIgnore(); + PartialRow row = insertIgnore.getRow(); + row.addInt(0, key); + row.addInt(1, 2); + row.addInt(2, 3); + row.addString(3, "a string"); + row.addBoolean(4, true); + return insertIgnore; + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTable.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTable.java new file mode 100644 index 0000000000..035b68a183 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTable.java @@ -0,0 +1,2684 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.EQUAL; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.GREATER; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.GREATER_EQUAL; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.LESS; +import static org.apache.kudu.client.KuduPredicate.ComparisonOp.LESS_EQUAL; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.createSchemaWithImmutableColumns; +import static org.apache.kudu.test.ClientTestUtil.createSchemaWithNonUniqueKey; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.apache.kudu.test.ClientTestUtil.getBasicTableOptionsWithNonCoveredRange; +import static org.apache.kudu.test.ClientTestUtil.scanTableToStrings; +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.test.ClientTestUtil; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.util.Pair; + +public class TestKuduTable { + private static final Logger LOG = LoggerFactory.getLogger(TestKuduTable.class); + + private static final Schema BASIC_SCHEMA = getBasicSchema(); + private static final String tableName = "TestKuduTable"; + + private static final Schema basicSchema = ClientTestUtil.getBasicSchema(); + + private KuduClient client; + private AsyncKuduClient asyncClient; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + } + + @Test(timeout = 100000) + public void testAlterColumn() throws Exception { + // Used a simplified schema because BASIC_SCHEMA has extra columns that make the asserts + // verbose. + List columns = ImmutableList.of( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder("value", Type.STRING) + .nullable(true) + .desiredBlockSize(4096) + .encoding(ColumnSchema.Encoding.PLAIN_ENCODING) + .compressionAlgorithm(ColumnSchema.CompressionAlgorithm.NO_COMPRESSION) + .build()); + KuduTable table = + client.createTable(tableName, new Schema(columns), getBasicCreateTableOptions()); + KuduSession session = client.newSession(); + // Insert a row before a default is defined and check the value is NULL. + insertDefaultRow(table, session, 0); + //ClientTestUtil.scanTa + List rows = scanTableToStrings(table); + assertEquals("wrong number of rows", 1, rows.size()); + assertEquals("wrong row", "INT32 key=0, STRING value=NULL", rows.get(0)); + + // Add a default, checking new rows see the new default and old rows remain the same. + client.alterTable(tableName, new AlterTableOptions().changeDefault("value", "pizza")); + + insertDefaultRow(table, session, 1); + rows = scanTableToStrings(table); + assertEquals("wrong number of rows", 2, rows.size()); + assertEquals("wrong row", "INT32 key=0, STRING value=NULL", rows.get(0)); + assertEquals("wrong row", "INT32 key=1, STRING value=pizza", rows.get(1)); + + // Change the default, checking new rows see the new default and old rows remain the same. + client.alterTable(tableName, new AlterTableOptions().changeDefault("value", "taco")); + + insertDefaultRow(table, session, 2); + + rows = scanTableToStrings(table); + assertEquals("wrong number of rows", 3, rows.size()); + assertEquals("wrong row", "INT32 key=0, STRING value=NULL", rows.get(0)); + assertEquals("wrong row", "INT32 key=1, STRING value=pizza", rows.get(1)); + assertEquals("wrong row", "INT32 key=2, STRING value=taco", rows.get(2)); + + // Remove the default, checking that new rows default to NULL and old rows remain the same. + client.alterTable(tableName, new AlterTableOptions().removeDefault("value")); + + insertDefaultRow(table, session, 3); + + rows = scanTableToStrings(table); + assertEquals("wrong number of rows", 4, rows.size()); + assertEquals("wrong row", "INT32 key=0, STRING value=NULL", rows.get(0)); + assertEquals("wrong row", "INT32 key=1, STRING value=pizza", rows.get(1)); + assertEquals("wrong row", "INT32 key=2, STRING value=taco", rows.get(2)); + assertEquals("wrong row", "INT32 key=3, STRING value=NULL", rows.get(3)); + + // Change the column storage attributes. + assertEquals("wrong block size", + 4096, + table.getSchema().getColumn("value").getDesiredBlockSize()); + assertEquals("wrong encoding", + ColumnSchema.Encoding.PLAIN_ENCODING, + table.getSchema().getColumn("value").getEncoding()); + assertEquals("wrong compression algorithm", + ColumnSchema.CompressionAlgorithm.NO_COMPRESSION, + table.getSchema().getColumn("value").getCompressionAlgorithm()); + + client.alterTable(tableName, new AlterTableOptions() + .changeDesiredBlockSize("value", 8192) + .changeEncoding("value", ColumnSchema.Encoding.DICT_ENCODING) + .changeCompressionAlgorithm("value", ColumnSchema.CompressionAlgorithm.SNAPPY)); + + KuduTable reopenedTable = client.openTable(tableName); + assertEquals("wrong block size post alter", + 8192, + reopenedTable.getSchema().getColumn("value").getDesiredBlockSize()); + assertEquals("wrong encoding post alter", + ColumnSchema.Encoding.DICT_ENCODING, + reopenedTable.getSchema().getColumn("value").getEncoding()); + assertEquals("wrong compression algorithm post alter", + ColumnSchema.CompressionAlgorithm.SNAPPY, + reopenedTable.getSchema().getColumn("value").getCompressionAlgorithm()); + } + + private void insertDefaultRow(KuduTable table, KuduSession session, int key) + throws Exception { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt("key", key); + // Omit value. + session.apply(insert); + } + + @Test(timeout = 100000) + public void testAlterTable() throws Exception { + client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + try { + + // Add a col. + client.alterTable(tableName, + new AlterTableOptions().addColumn("testaddint", Type.INT32, 4)); + + // Rename that col. + client.alterTable(tableName, + new AlterTableOptions().renameColumn("testaddint", "newtestaddint")); + + // Delete it. + client.alterTable(tableName, new AlterTableOptions().dropColumn("newtestaddint")); + + String newTableName = tableName + "new"; + + // Rename our table. + client.alterTable(tableName, new AlterTableOptions().renameTable(newTableName)); + + // Rename it back. + client.alterTable(newTableName, new AlterTableOptions().renameTable(tableName)); + + // Add 3 columns, where one has default value, nullable and Timestamp with default value + client.alterTable(tableName, new AlterTableOptions() + .addColumn("testaddmulticolnotnull", Type.INT32, 4) + .addNullableColumn("testaddmulticolnull", Type.STRING) + .addColumn("testaddmulticolTimestampcol", Type.UNIXTIME_MICROS, + (System.currentTimeMillis() * 1000))); + + // Try altering a table that doesn't exist. + String nonExistingTableName = "table_does_not_exist"; + try { + client.alterTable(nonExistingTableName, new AlterTableOptions()); + fail("Shouldn't be able to alter a table that doesn't exist"); + } catch (KuduException ex) { + assertTrue(ex.getStatus().isNotFound()); + } + + try { + client.isAlterTableDone(nonExistingTableName); + fail("Shouldn't be able to query if an alter table is done here"); + } catch (KuduException ex) { + assertTrue(ex.getStatus().isNotFound()); + } + } finally { + // Normally Java tests accumulate tables without issue, deleting them all + // when shutting down the mini cluster at the end of every test class. + // However, testGetLocations below expects a certain table count, so + // we'll delete our table to ensure there's no interaction between them. + client.deleteTable(tableName); + } + } + + /** + * Test creating tables of different sizes and see that we get the correct number of tablets back. + */ + @Test + @SuppressWarnings("deprecation") + public void testGetLocations() throws Exception { + final int initialTableCount = + asyncClient.getTablesList().join(DEFAULT_SLEEP).getTablesList().size(); + + final String NON_EXISTENT_TABLE = "NON_EXISTENT_TABLE"; + + // Test a non-existing table + try { + client.openTable(NON_EXISTENT_TABLE); + fail("Should receive an exception since the table doesn't exist"); + } catch (Exception ex) { + // expected + } + // Test with defaults + String tableWithDefault = tableName + "-WithDefault"; + CreateTableOptions builder = getBasicCreateTableOptions(); + List columns = new ArrayList<>(BASIC_SCHEMA.getColumnCount()); + int defaultInt = 30; + String defaultString = "data"; + for (ColumnSchema columnSchema : BASIC_SCHEMA.getColumns()) { + + Object defaultValue; + + if (columnSchema.getType() == Type.INT32) { + defaultValue = defaultInt; + } else if (columnSchema.getType() == Type.BOOL) { + defaultValue = true; + } else { + defaultValue = defaultString; + } + columns.add( + new ColumnSchema.ColumnSchemaBuilder(columnSchema.getName(), columnSchema.getType()) + .key(columnSchema.isKey()) + .nullable(columnSchema.isNullable()) + .defaultValue(defaultValue).build()); + } + Schema schemaWithDefault = new Schema(columns); + KuduTable kuduTable = client.createTable(tableWithDefault, schemaWithDefault, builder); + assertEquals(defaultInt, kuduTable.getSchema().getColumnByIndex(0).getDefaultValue()); + assertEquals(defaultString, + kuduTable.getSchema().getColumnByIndex(columns.size() - 2).getDefaultValue()); + assertEquals(true, + kuduTable.getSchema().getColumnByIndex(columns.size() - 1).getDefaultValue()); + + // Make sure the table's schema includes column IDs. + assertTrue(kuduTable.getSchema().hasColumnIds()); + + // Test we can open a table that was already created. + client.openTable(tableWithDefault); + + String splitTablePrefix = tableName + "-Splits"; + // Test splitting and reading those splits + KuduTable kuduTableWithoutDefaults = createTableWithSplitsAndTest(splitTablePrefix, 0); + // finish testing read defaults + assertNull(kuduTableWithoutDefaults.getSchema().getColumnByIndex(0).getDefaultValue()); + createTableWithSplitsAndTest(splitTablePrefix, 3); + createTableWithSplitsAndTest(splitTablePrefix, 10); + + KuduTable table = createTableWithSplitsAndTest(splitTablePrefix, 30); + + List tablets = + table.getTabletsLocations(null, getKeyInBytes(9), DEFAULT_SLEEP); + assertEquals(9, tablets.size()); + assertEquals(9, + table.asyncGetTabletsLocations(null, getKeyInBytes(9), DEFAULT_SLEEP).join().size()); + + tablets = table.getTabletsLocations(getKeyInBytes(0), getKeyInBytes(9), DEFAULT_SLEEP); + assertEquals(9, tablets.size()); + assertEquals(9, + table.asyncGetTabletsLocations(getKeyInBytes(0), + getKeyInBytes(9), DEFAULT_SLEEP).join().size()); + + tablets = table.getTabletsLocations(getKeyInBytes(5), getKeyInBytes(9), DEFAULT_SLEEP); + assertEquals(4, tablets.size()); + assertEquals(4, + table.asyncGetTabletsLocations(getKeyInBytes(5), + getKeyInBytes(9), DEFAULT_SLEEP).join().size()); + + tablets = table.getTabletsLocations(getKeyInBytes(5), getKeyInBytes(14), DEFAULT_SLEEP); + assertEquals(9, tablets.size()); + assertEquals(9, + table.asyncGetTabletsLocations(getKeyInBytes(5), + getKeyInBytes(14), DEFAULT_SLEEP).join().size()); + + tablets = table.getTabletsLocations(getKeyInBytes(5), getKeyInBytes(31), DEFAULT_SLEEP); + assertEquals(26, tablets.size()); + assertEquals(26, + table.asyncGetTabletsLocations(getKeyInBytes(5), + getKeyInBytes(31), DEFAULT_SLEEP).join().size()); + + tablets = table.getTabletsLocations(getKeyInBytes(5), null, DEFAULT_SLEEP); + assertEquals(26, tablets.size()); + assertEquals(26, + table.asyncGetTabletsLocations(getKeyInBytes(5), null, DEFAULT_SLEEP).join().size()); + + tablets = table.getTabletsLocations(null, getKeyInBytes(10000), DEFAULT_SLEEP); + assertEquals(31, tablets.size()); + assertEquals(31, + table.asyncGetTabletsLocations(null, + getKeyInBytes(10000), DEFAULT_SLEEP).join().size()); + + tablets = table.getTabletsLocations(getKeyInBytes(20), getKeyInBytes(10000), DEFAULT_SLEEP); + assertEquals(11, tablets.size()); + assertEquals(11, + table.asyncGetTabletsLocations(getKeyInBytes(20), + getKeyInBytes(10000), DEFAULT_SLEEP).join().size()); + + // Test listing tables. + assertEquals(0, asyncClient.getTablesList(NON_EXISTENT_TABLE) + .join(DEFAULT_SLEEP).getTablesList().size()); + assertEquals(1, asyncClient.getTablesList(tableWithDefault) + .join(DEFAULT_SLEEP).getTablesList().size()); + assertEquals(initialTableCount + 5, + asyncClient.getTablesList().join(DEFAULT_SLEEP).getTablesList().size()); + assertFalse(asyncClient.getTablesList(tableWithDefault) + .join(DEFAULT_SLEEP).getTablesList().isEmpty()); + + assertFalse(asyncClient.tableExists(NON_EXISTENT_TABLE).join(DEFAULT_SLEEP)); + assertTrue(asyncClient.tableExists(tableWithDefault).join(DEFAULT_SLEEP)); + } + + @Test(timeout = 100000) + @SuppressWarnings("deprecation") + public void testLocateTableNonCoveringRange() throws Exception { + client.createTable(tableName, basicSchema, getBasicTableOptionsWithNonCoveredRange()); + KuduTable table = client.openTable(tableName); + + List tablets; + + // all tablets + tablets = table.getTabletsLocations(null, null, 100000); + assertEquals(3, tablets.size()); + assertArrayEquals(getKeyInBytes(0), tablets.get(0).getPartition().getPartitionKeyStart()); + assertArrayEquals(getKeyInBytes(50), tablets.get(0).getPartition().getPartitionKeyEnd()); + assertArrayEquals(getKeyInBytes(50), tablets.get(1).getPartition().getPartitionKeyStart()); + assertArrayEquals(getKeyInBytes(100), tablets.get(1).getPartition().getPartitionKeyEnd()); + assertArrayEquals(getKeyInBytes(200), tablets.get(2).getPartition().getPartitionKeyStart()); + assertArrayEquals(getKeyInBytes(300), tablets.get(2).getPartition().getPartitionKeyEnd()); + + // key < 50 + tablets = table.getTabletsLocations(null, getKeyInBytes(50), 100000); + assertEquals(1, tablets.size()); + assertArrayEquals(getKeyInBytes(0), tablets.get(0).getPartition().getPartitionKeyStart()); + assertArrayEquals(getKeyInBytes(50), tablets.get(0).getPartition().getPartitionKeyEnd()); + + // key >= 300 + tablets = table.getTabletsLocations(getKeyInBytes(300), null, 100000); + assertEquals(0, tablets.size()); + + // key >= 299 + tablets = table.getTabletsLocations(getKeyInBytes(299), null, 100000); + assertEquals(1, tablets.size()); + assertArrayEquals(getKeyInBytes(200), tablets.get(0).getPartition().getPartitionKeyStart()); + assertArrayEquals(getKeyInBytes(300), tablets.get(0).getPartition().getPartitionKeyEnd()); + + // key >= 150 && key < 250 + tablets = table.getTabletsLocations(getKeyInBytes(150), getKeyInBytes(250), 100000); + assertEquals(1, tablets.size()); + assertArrayEquals(getKeyInBytes(200), tablets.get(0).getPartition().getPartitionKeyStart()); + assertArrayEquals(getKeyInBytes(300), tablets.get(0).getPartition().getPartitionKeyEnd()); + } + + public byte[] getKeyInBytes(int i) { + PartialRow row = BASIC_SCHEMA.newPartialRow(); + row.addInt(0, i); + return row.encodePrimaryKey(); + } + + @Test(timeout = 100000) + @SuppressWarnings("deprecation") + public void testAlterTableNonCoveringRange() throws Exception { + client.createTable(tableName, basicSchema, getBasicTableOptionsWithNonCoveredRange()); + final KuduTable table = client.openTable(tableName); + final KuduSession session = client.newSession(); + + AlterTableOptions ato = new AlterTableOptions(); + PartialRow lowerBound = BASIC_SCHEMA.newPartialRow(); + lowerBound.addInt("key", 300); + PartialRow upperBound = BASIC_SCHEMA.newPartialRow(); + upperBound.addInt("key", 400); + ato.addRangePartition(lowerBound, upperBound); + client.alterTable(tableName, ato); + + Insert insert = createBasicSchemaInsert(table, 301); + session.apply(insert); + + List tablets; + + // all tablets + tablets = table.getTabletsLocations(getKeyInBytes(300), null, 100000); + assertEquals(1, tablets.size()); + assertArrayEquals(getKeyInBytes(300), tablets.get(0).getPartition().getPartitionKeyStart()); + assertArrayEquals(getKeyInBytes(400), tablets.get(0).getPartition().getPartitionKeyEnd()); + + insert = createBasicSchemaInsert(table, 201); + session.apply(insert); + + ato = new AlterTableOptions(); + lowerBound = BASIC_SCHEMA.newPartialRow(); + lowerBound.addInt("key", 200); + upperBound = BASIC_SCHEMA.newPartialRow(); + upperBound.addInt("key", 300); + ato.dropRangePartition(lowerBound, upperBound); + client.alterTable(tableName, ato); + + insert = createBasicSchemaInsert(table, 202); + OperationResponse response = session.apply(insert); + assertTrue(response.hasRowError()); + assertTrue(response.getRowError().getErrorStatus().isNotFound()); + } + + @Test(timeout = 100000) + public void testFormatRangePartitions() throws Exception { + CreateTableOptions builder = getBasicCreateTableOptions(); + List expected = Lists.newArrayList(); + + { + expected.add("VALUES < -300"); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, -300); + builder.addRangePartition(basicSchema.newPartialRow(), upper); + } + { + expected.add("-100 <= VALUES < 0"); + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, -100); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 0); + builder.addRangePartition(lower, upper); + } + { + expected.add("0 <= VALUES < 100"); + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, -1); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 99); + builder.addRangePartition(lower, upper, + RangePartitionBound.EXCLUSIVE_BOUND, + RangePartitionBound.INCLUSIVE_BOUND); + } + { + expected.add("VALUE = 300"); + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 300); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 300); + builder.addRangePartition(lower, upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.INCLUSIVE_BOUND); + } + { + expected.add("VALUES >= 400"); + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 400); + builder.addRangePartition(lower, basicSchema.newPartialRow()); + } + + client.createTable(tableName, basicSchema, builder); + assertEquals( + expected, + client.openTable(tableName).getFormattedRangePartitions(10000)); + } + + @Test(timeout = 100000) + public void testCreateTablePartitionWithEmptyCustomHashSchema() throws Exception { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, -100); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 100); + + CreateTableOptions builder = getBasicCreateTableOptions(); + + // Using an empty custom hash schema for the range. + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + builder.addRangePartition(rangePartition); + + KuduTable table = client.createTable(tableName, basicSchema, builder); + + // Check the result: retrieve the information on tablets from master + // and check if each partition has the expected parameters. + { + for (KuduScanToken token : new KuduScanToken.KuduScanTokenBuilder(asyncClient, table) + .setTimeout(client.getDefaultOperationTimeoutMs()).build()) { + Partition p = token.getTablet().getPartition(); + // No hash partitions are expected. + assertEquals(0, p.getHashBuckets().size()); + } + + final List rangePartitions = + table.getRangePartitions(client.getDefaultOperationTimeoutMs()); + assertEquals(1, rangePartitions.size()); + final Partition p = rangePartitions.get(0); + + assertTrue(p.getRangeKeyStart().length > 0); + PartialRow rangeKeyStartDecoded = p.getDecodedRangeKeyStart(table); + assertEquals(-100, rangeKeyStartDecoded.getInt(0)); + assertTrue(p.getRangeKeyEnd().length > 0); + PartialRow rangeKeyEndDecoded = p.getDecodedRangeKeyEnd(table); + assertEquals(100, rangeKeyEndDecoded.getInt(0)); + } + + assertEquals( + ImmutableList.of("-100 <= VALUES < 100"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + assertEquals( + ImmutableList.of("-100 <= VALUES < 100"), + client.openTable(tableName).getFormattedRangePartitionsWithHashSchema(10000)); + } + + @Test(timeout = 100000) + public void testCreateTablePartitionWithCustomHashSchema() throws Exception { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, -100); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 200); + + // Simple custom hash schema for the range: two buckets on the column "key". + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 2, 0); + + CreateTableOptions builder = getBasicCreateTableOptions(); + builder.addRangePartition(rangePartition); + + // Add table-wide schema: it should have the same number of dimensions + // as the range-specific hash schema. However, this schema isn't used + // in this test scenario. + builder.addHashPartitions(ImmutableList.of("key"), 7, 0); + + KuduTable table = client.createTable(tableName, basicSchema, builder); + + assertEquals( + ImmutableList.of("-100 <= VALUES < 200"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + assertEquals( + ImmutableList.of("-100 <= VALUES < 200 HASH(key) PARTITIONS 2"), + client.openTable(tableName).getFormattedRangePartitionsWithHashSchema(10000)); + + // Check the result: retrieve the information on tablets from master + // and check if each partition has expected parameters. + { + Set buckets = new HashSet(); + for (KuduScanToken token : new KuduScanToken.KuduScanTokenBuilder(asyncClient, table) + .setTimeout(client.getDefaultOperationTimeoutMs()).build()) { + Partition p = token.getTablet().getPartition(); + // Two hash partitions are expected per range. + assertEquals(1, p.getHashBuckets().size()); + for (Integer idx : p.getHashBuckets()) { + buckets.add(idx); + } + } + assertEquals(2, buckets.size()); + for (int i = 0; i < buckets.size(); ++i) { + assertTrue(String.format("must have bucket %d", i), buckets.contains(i)); + } + + final List rangePartitions = + table.getRangePartitions(client.getDefaultOperationTimeoutMs()); + assertEquals(1, rangePartitions.size()); + final Partition p = rangePartitions.get(0); + + assertTrue(p.getRangeKeyStart().length > 0); + PartialRow rangeKeyStartDecoded = p.getDecodedRangeKeyStart(table); + assertEquals(-100, rangeKeyStartDecoded.getInt(0)); + assertTrue(p.getRangeKeyEnd().length > 0); + PartialRow rangeKeyEndDecoded = p.getDecodedRangeKeyEnd(table); + assertEquals(200, rangeKeyEndDecoded.getInt(0)); + } + } + + @Test(timeout = 100000) + public void testRangePartitionWithCustomHashSchemaBasic() throws Exception { + final int valLower = 10; + final int valUpper = 20; + + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, valLower); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, valUpper); + + // Simple custom hash schema for the range: five buckets on the column "key". + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 5, 0); + + CreateTableOptions builder = getBasicCreateTableOptions(); + builder.addRangePartition(rangePartition); + // Add table-wide schema: it should have the same number of dimensions + // as the range-specific hash schema. However, this schema isn't used + // in this test scenario. + builder.addHashPartitions(ImmutableList.of("key"), 32, 0); + + final KuduTable table = client.createTable(tableName, basicSchema, builder); + final PartitionSchema ps = table.getPartitionSchema(); + assertTrue(ps.hasCustomHashSchemas()); + assertFalse(ps.isSimpleRangePartitioning()); + + // NOTE: use schema from server since ColumnIDs are needed for row encoding + final PartialRow rowLower = table.getSchema().newPartialRow(); + rowLower.addInt(0, valLower); + + final PartialRow rowUpper = table.getSchema().newPartialRow(); + rowUpper.addInt(0, valUpper); + + { + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(rowLower, ps.getRangeSchema())); + // There should be just one dimension with five buckets. + assertEquals(1, s.size()); + assertEquals(5, s.get(0).getNumBuckets()); + } + { + // There should be 5 partitions: the newly created table has a single + // range with 5 hash buckets, but KuduTable.getRangePartitions() removes + // the 'duplicates' with hash code other than 0. So, the result should be + // just one partition with hash code 0. + List partitions = table.getRangePartitions(50000); + assertEquals(1, partitions.size()); + List buckets = partitions.get(0).getHashBuckets(); + assertEquals(1, buckets.size()); // there is just one hash dimension + assertEquals(0, buckets.get(0).intValue()); + } + { + final byte[] rowLowerEnc = ps.encodePartitionKey(rowLower); + final byte[] rowUpperEnc = ps.encodePartitionKey(rowUpper); + + // The range part comes after the hash part in an encoded partition key. + // The hash part contains 4 * number_of_hash_dimensions bytes. + byte[] hashLower = Arrays.copyOfRange(rowLowerEnc, 4, rowLowerEnc.length); + byte[] hashUpper = Arrays.copyOfRange(rowUpperEnc, 4, rowUpperEnc.length); + + Set buckets = new HashSet(); + for (KuduScanToken token : new KuduScanToken.KuduScanTokenBuilder(asyncClient, table) + .setTimeout(client.getDefaultOperationTimeoutMs()).build()) { + final Partition p = token.getTablet().getPartition(); + assertEquals(0, Bytes.memcmp(p.getRangeKeyStart(), hashLower)); + assertEquals(0, Bytes.memcmp(p.getRangeKeyEnd(), hashUpper)); + assertEquals(1, p.getHashBuckets().size()); + buckets.add(p.getHashBuckets().get(0)); + } + + // Check the generated scan tokens cover all the tablets for the range: + // all hash bucket indices should be present. + assertEquals(5, buckets.size()); + for (int i = 0; i < buckets.size(); ++i) { + assertTrue(String.format("must have bucket %d", i), buckets.contains(i)); + } + } + } + + @Test(timeout = 100000) + public void testCreateTableCustomHashSchemasTwoRanges() throws Exception { + CreateTableOptions builder = getBasicCreateTableOptions(); + + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 0); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 100); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 2, 0); + builder.addRangePartition(rangePartition); + } + + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 100); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 200); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 3, 0); + builder.addRangePartition(rangePartition); + } + + // Add table-wide schema as well -- that's to satisfy the constraint on + // the number of hash dimensions in table's hash schemas. However, this + // scenario isn't going to create a range with table-wide hash schema. + builder.addHashPartitions(ImmutableList.of("key"), 5, 0); + + KuduTable table = client.createTable(tableName, basicSchema, builder); + + // Check the result: retrieve the information on tablets from master + // and check if each partition has expected parameters. + List tablets = table.getTabletsLocations(10000); + // There should be 5 tablets: 2 for [0, 100) range and 3 for [100, 200). + assertEquals(5, tablets.size()); + + assertEquals( + ImmutableList.of("0 <= VALUES < 100", "100 <= VALUES < 200"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + assertEquals( + ImmutableList.of( + "0 <= VALUES < 100 HASH(key) PARTITIONS 2", + "100 <= VALUES < 200 HASH(key) PARTITIONS 3"), + client.openTable(tableName).getFormattedRangePartitionsWithHashSchema(10000)); + + // Insert data into the newly created table and read it back. + KuduSession session = client.newSession(); + for (int key = 0; key < 200; ++key) { + Insert insert = createBasicSchemaInsert(table, key); + session.apply(insert); + } + session.flush(); + + // Do full table scan. + List rowStrings = scanTableToStrings(table); + assertEquals(200, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, -1)); + assertEquals(0, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 0)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 1)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 99)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 100)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 101)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 199)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 200)); + assertEquals(0, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 201)); + assertEquals(0, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER, 0)); + assertEquals(199, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER, 100)); + assertEquals(99, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER, 199)); + assertEquals(0, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER, 200)); + assertEquals(0, rowStrings.size()); + + // Predicate to have all rows in the range with table-wide hash schema. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 100)); + assertEquals(100, rowStrings.size()); + + // Predicate to have all rows in the range with custom hash schema. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 100), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 200)); + assertEquals(100, rowStrings.size()); + + // Predicate to have one part of the rows in the range with table-wide hash + // schema, and the other part from the range with custom hash schema. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 50), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 150)); + assertEquals(100, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 150)); + assertEquals(150, rowStrings.size()); + + // Predicates to almost cover the both ranges (sort of off-by-one situation). + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 1), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 199)); + assertEquals(198, rowStrings.size()); + + // Predicates to almost cover the both ranges (sort of off-by-one situation). + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 1), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 200)); + assertEquals(199, rowStrings.size()); + + // Predicates to almost cover the both ranges (sort of off-by-one situation). + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 199)); + assertEquals(199, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 199)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS_EQUAL, 0)); + assertEquals(1, rowStrings.size()); + + // Predicate to cover exactly both ranges. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 200)); + assertEquals(200, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 200)); + assertEquals(200, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 0)); + assertEquals(200, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 200)); + assertEquals(0, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 0)); + assertEquals(0, rowStrings.size()); + } + + @Test(timeout = 100000) + public void testCreateTableCustomHashSchemasTwoUnboundedRanges() throws Exception { + CreateTableOptions builder = getBasicCreateTableOptions(); + + { + PartialRow lower = basicSchema.newPartialRow(); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 0); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 2, 0); + builder.addRangePartition(rangePartition); + } + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 0); + PartialRow upper = basicSchema.newPartialRow(); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 3, 0); + builder.addRangePartition(rangePartition); + } + + // Add table-wide schema as well -- that's to satisfy the constraint on + // the number of hash dimensions in table's hash schemas. However, this + // scenario isn't going to create a range with table-wide hash schema. + builder.addHashPartitions(ImmutableList.of("key"), 5, 0); + + KuduTable table = client.createTable(tableName, basicSchema, builder); + + // There should be 5 tablets: 2 for [-inf, 0) range and 3 for [0, +inf). + List tablets = table.getTabletsLocations(10000); + assertEquals(5, tablets.size()); + + assertEquals( + ImmutableList.of("VALUES < 0", "VALUES >= 0"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + assertEquals( + ImmutableList.of( + "VALUES < 0 HASH(key) PARTITIONS 2", + "VALUES >= 0 HASH(key) PARTITIONS 3"), + client.openTable(tableName).getFormattedRangePartitionsWithHashSchema(10000)); + + // Insert data into the newly created table and read it back. + KuduSession session = client.newSession(); + for (int key = -250; key < 250; ++key) { + Insert insert = createBasicSchemaInsert(table, key); + session.apply(insert); + } + session.flush(); + + // Do full table scan. + List rowStrings = scanTableToStrings(table); + assertEquals(500, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 0)); + assertEquals(250, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 0)); + assertEquals(250, rowStrings.size()); + + // Predicate to have one part of the rows in the range with table-wide hash + // schema, and the other part from the range with custom hash schema. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, -150), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 150)); + assertEquals(300, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, -250)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 250)); + assertEquals(0, rowStrings.size()); + } + + @Test(timeout = 100000) + public void testCreateTableCustomHashSchemasTwoMixedRanges() throws Exception { + CreateTableOptions builder = getBasicCreateTableOptions(); + + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 0); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 100); + + // Simple custom hash schema for the range: two buckets on the column "key". + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 2, 0); + builder.addRangePartition(rangePartition); + } + + // Add table-wide schema as well. + builder.addHashPartitions(ImmutableList.of("key"), 5, 0); + + // Add a range to have the table-wide hash schema. + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 100); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 200); + builder.addRangePartition(lower, upper); + } + + KuduTable table = client.createTable(tableName, basicSchema, builder); + + // Check the result: retrieve the information on tablets from master + // and check if each partition has expected parameters. + List tablets = table.getTabletsLocations(10000); + // There should be 7 tablets: 2 for the [0, 100) range and 5 for [100, 200). + assertEquals(7, tablets.size()); + + assertEquals( + ImmutableList.of("0 <= VALUES < 100", "100 <= VALUES < 200"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + assertEquals( + ImmutableList.of( + "0 <= VALUES < 100 HASH(key) PARTITIONS 2", + "100 <= VALUES < 200 HASH(key) PARTITIONS 5"), + client.openTable(tableName).getFormattedRangePartitionsWithHashSchema(10000)); + + // Insert data into the newly created table and read it back. + KuduSession session = client.newSession(); + for (int key = 0; key < 200; ++key) { + Insert insert = createBasicSchemaInsert(table, key); + session.apply(insert); + } + session.flush(); + + // Do full table scan. + List rowStrings = scanTableToStrings(table); + assertEquals(200, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, -1)); + assertEquals(0, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 0)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 1)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 99)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 100)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 101)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 199)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 200)); + assertEquals(0, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), EQUAL, 201)); + assertEquals(0, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER, 0)); + assertEquals(199, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER, 100)); + assertEquals(99, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER, 199)); + assertEquals(0, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER, 200)); + assertEquals(0, rowStrings.size()); + + // Predicate to have all rows in the range with table-wide hash schema. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 100)); + assertEquals(100, rowStrings.size()); + + // Predicate to have all rows in the range with custom hash schema. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 100), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 200)); + assertEquals(100, rowStrings.size()); + + // Predicate to have one part of the rows in the range with table-wide hash + // schema, and the other part from the range with custom hash schema. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 50), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 150)); + assertEquals(100, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 150)); + assertEquals(150, rowStrings.size()); + + // Predicates to almost cover the both ranges (sort of off-by-one situation). + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 1), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 199)); + assertEquals(198, rowStrings.size()); + + // Predicates to almost cover the both ranges (sort of off-by-one situation). + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 1), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 200)); + assertEquals(199, rowStrings.size()); + + // Predicates to almost cover the both ranges (sort of off-by-one situation). + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 199)); + assertEquals(199, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 199)); + assertEquals(1, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS_EQUAL, 0)); + assertEquals(1, rowStrings.size()); + + // Predicate to cover exactly both ranges. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 200)); + assertEquals(200, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 200)); + assertEquals(200, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 0)); + assertEquals(200, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), GREATER_EQUAL, 200)); + assertEquals(0, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(basicSchema.getColumn("key"), LESS, 0)); + assertEquals(0, rowStrings.size()); + } + + @Test(timeout = 100000) + public void testCreateTableCustomHashSchemaDifferentDimensions() throws Exception { + // Have the table-wide hash schema different from custom hash schema per + // various ranges: it should not be possible to create a table. + ArrayList columns = new ArrayList<>(3); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0i", Type.INT32).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1i", Type.INT64).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c2s", Type.STRING).key(true).build()); + final Schema schema = new Schema(columns); + + CreateTableOptions builder = new CreateTableOptions().setRangePartitionColumns( + ImmutableList.of("c0i")); + + // Add table-wide schema with two hash dimensions. + builder.addHashPartitions(ImmutableList.of("c1i"), 7, 0); + builder.addHashPartitions(ImmutableList.of("c2s"), 3, 0); + + // Simple custom hash schema for the range: two buckets on the column "key". + { + PartialRow lower = schema.newPartialRow(); + lower.addInt(0, -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt(0, 200); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("c0i"), 2, 0); + builder.addRangePartition(rangePartition); + } + + try { + client.createTable(tableName, schema, builder); + fail("shouldn't be able to create a table with hash schemas varying in " + + "number of hash dimensions across table partitions"); + } catch (KuduException ex) { + assertTrue(ex.getStatus().isNotSupported()); + final String errmsg = ex.getMessage(); + assertTrue(errmsg, errmsg.matches( + "varying number of hash dimensions per range is not yet supported")); + } + + // OK, now try a mixed case: one range with hash schema matching the number + // of hash dimensions of the table-wide hash schema, and a few more ranges + // with different number of hash dimensions in their hash schema. + // Simple custom hash schema for the range: two buckets on the column "key". + { + PartialRow lower = schema.newPartialRow(); + lower.addInt(0, 200); + PartialRow upper = schema.newPartialRow(); + upper.addInt(0, 300); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("c0i"), 2, 0); + rangePartition.addHashPartitions(ImmutableList.of("c1i"), 3, 0); + builder.addRangePartition(rangePartition); + } + + try { + client.createTable(tableName, schema, builder); + fail("shouldn't be able to create a table with hash schemas varying in " + + "number of hash dimensions across table partitions"); + } catch (KuduException ex) { + assertTrue(ex.getStatus().isNotSupported()); + final String errmsg = ex.getMessage(); + assertTrue(errmsg, errmsg.matches( + "varying number of hash dimensions per range is not yet supported")); + } + } + + @Test(timeout = 100000) + public void testGetHashSchemaForRange() throws Exception { + final int valLower = 100; + final int valUpper = 200; + + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, valLower); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, valUpper); + + // Custom hash schema for the range: three buckets on the column "key". + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 3, 0); + + CreateTableOptions builder = getBasicCreateTableOptions(); + builder.addRangePartition(rangePartition); + + // Add table-wide schema with one dimensions and five buckets. + builder.addHashPartitions(ImmutableList.of("key"), 5, 0); + + final KuduTable table = client.createTable(tableName, basicSchema, builder); + final PartitionSchema ps = table.getPartitionSchema(); + + // Should get the table-wide schema as the result when asking for a point + // in a non-covered range. + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, 99); + + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + assertEquals(1, s.size()); + assertEquals(5, s.get(0).getNumBuckets()); + } + + // The exact range boundary: should get the custom hash schema. + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, 100); + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + assertEquals(1, s.size()); + assertEquals(3, s.get(0).getNumBuckets()); + } + + // A value within the range: should get the custom hash schema. + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, 101); + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + assertEquals(1, s.size()); + assertEquals(3, s.get(0).getNumBuckets()); + } + + // Should get the table-wide schema as the result when asking for the + // upper exclusive boundary. + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, 200); + + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + assertEquals(1, s.size()); + assertEquals(5, s.get(0).getNumBuckets()); + } + + // Should get the table-wide schema as the result when asking for a point + // in a non-covered range. + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, 300); + + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + // There should be just one dimension with two buckets. + assertEquals(1, s.size()); + assertEquals(5, s.get(0).getNumBuckets()); + } + } + + @Test(timeout = 100000) + public void testGetHashSchemaForRangeUnbounded() throws Exception { + // The test table is created with the following ranges: + // (-inf, -100) [-100, 0) [0, 100), [100, +inf) + + CreateTableOptions builder = getBasicCreateTableOptions(); + // Add table-wide schema with one dimensions and two buckets. + builder.addHashPartitions(ImmutableList.of("key"), 2, 0); + + // Add range partition with custom hash schema: (-inf, -100) + { + PartialRow lower = basicSchema.newPartialRow(); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, -100); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 3, 0); + + builder.addRangePartition(rangePartition); + } + + // Add range partition with table-wide hash schema: [-100, 0) + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, -100); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 0); + + builder.addRangePartition(lower, upper); + } + + // Add range partition with custom hash schema: [0, 100) + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 0); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 100); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 5, 0); + + builder.addRangePartition(rangePartition); + } + + // Add range partition with table-wide hash schema: [100, +inf) + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 100); + PartialRow upper = basicSchema.newPartialRow(); + + builder.addRangePartition(lower, upper); + } + + final KuduTable table = client.createTable(tableName, basicSchema, builder); + final PartitionSchema ps = table.getPartitionSchema(); + + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, -2002); + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + assertEquals(1, s.size()); + assertEquals(3, s.get(0).getNumBuckets()); + } + + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, -101); + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + assertEquals(1, s.size()); + assertEquals(3, s.get(0).getNumBuckets()); + } + + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, -100); + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + assertEquals(1, s.size()); + assertEquals(2, s.get(0).getNumBuckets()); + } + + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, 0); + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + assertEquals(1, s.size()); + assertEquals(5, s.get(0).getNumBuckets()); + } + + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, 99); + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + assertEquals(1, s.size()); + assertEquals(5, s.get(0).getNumBuckets()); + } + + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, 100); + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + assertEquals(1, s.size()); + assertEquals(2, s.get(0).getNumBuckets()); + } + + { + PartialRow row = table.getSchema().newPartialRow(); + row.addInt(0, 1001); + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(row, ps.getRangeSchema())); + assertEquals(1, s.size()); + assertEquals(2, s.get(0).getNumBuckets()); + } + } + + @Test(timeout = 100000) + public void testFormatRangePartitionsCompoundColumns() throws Exception { + ArrayList columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("a", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("b", Type.INT8).key(true).build()); + Schema schema = new Schema(columns); + + CreateTableOptions builder = new CreateTableOptions(); + builder.addHashPartitions(ImmutableList.of("a"), 2); + builder.addHashPartitions(ImmutableList.of("b"), 2); + builder.setRangePartitionColumns(ImmutableList.of("a", "b")); + List expected = Lists.newArrayList(); + + { + expected.add("VALUES < (\"\", -100)"); + PartialRow upper = schema.newPartialRow(); + upper.addString(0, ""); + upper.addByte(1, (byte) -100); + builder.addRangePartition(schema.newPartialRow(), upper); + } + { + expected.add("VALUE = (\"abc\", 0)"); + PartialRow lower = schema.newPartialRow(); + lower.addString(0, "abc"); + lower.addByte(1, (byte) 0); + PartialRow upper = schema.newPartialRow(); + upper.addString(0, "abc"); + upper.addByte(1, (byte) 1); + builder.addRangePartition(lower, upper); + } + { + expected.add("(\"def\", 0) <= VALUES < (\"ghi\", 100)"); + PartialRow lower = schema.newPartialRow(); + lower.addString(0, "def"); + lower.addByte(1, (byte) -1); + PartialRow upper = schema.newPartialRow(); + upper.addString(0, "ghi"); + upper.addByte(1, (byte) 99); + builder.addRangePartition(lower, upper, + RangePartitionBound.EXCLUSIVE_BOUND, + RangePartitionBound.INCLUSIVE_BOUND); + } + + client.createTable(tableName, schema, builder); + assertEquals( + expected, + client.openTable(tableName).getFormattedRangePartitions(10000)); + } + + @Test(timeout = 100000) + public void testFormatRangePartitionsStringColumn() throws Exception { + ArrayList columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("a", Type.STRING).key(true).build()); + Schema schema = new Schema(columns); + + CreateTableOptions builder = new CreateTableOptions(); + builder.setRangePartitionColumns(ImmutableList.of("a")); + List expected = Lists.newArrayList(); + + { + expected.add("VALUES < \"\\0\""); + PartialRow upper = schema.newPartialRow(); + upper.addString(0, "\0"); + builder.addRangePartition(schema.newPartialRow(), upper); + } + { + expected.add("VALUE = \"abc\""); + PartialRow lower = schema.newPartialRow(); + lower.addString(0, "abc"); + PartialRow upper = schema.newPartialRow(); + upper.addString(0, "abc\0"); + builder.addRangePartition(lower, upper); + } + { + expected.add("\"def\" <= VALUES < \"ghi\""); + PartialRow lower = schema.newPartialRow(); + lower.addString(0, "def"); + PartialRow upper = schema.newPartialRow(); + upper.addString(0, "ghi"); + builder.addRangePartition(lower, upper); + } + { + expected.add("VALUES >= \"z\""); + PartialRow lower = schema.newPartialRow(); + lower.addString(0, "z"); + builder.addRangePartition(lower, schema.newPartialRow()); + } + + client.createTable(tableName, schema, builder); + assertEquals( + expected, + client.openTable(tableName).getFormattedRangePartitions(10000)); + } + + @Test(timeout = 100000) + public void testFormatRangePartitionsUnbounded() throws Exception { + CreateTableOptions builder = getBasicCreateTableOptions(); + client.createTable(tableName, basicSchema, builder); + + assertEquals( + ImmutableList.of("UNBOUNDED"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + } + + @SuppressWarnings("deprecation") + private KuduTable createTableWithSplitsAndTest(String tableNamePrefix, int splitsCount) + throws Exception { + String newTableName = tableNamePrefix + "-" + splitsCount; + CreateTableOptions builder = getBasicCreateTableOptions(); + + if (splitsCount != 0) { + for (int i = 1; i <= splitsCount; i++) { + PartialRow row = BASIC_SCHEMA.newPartialRow(); + row.addInt(0, i); + builder.addSplitRow(row); + } + } + KuduTable table = client.createTable(newTableName, BASIC_SCHEMA, builder); + + List tablets = table.getTabletsLocations(DEFAULT_SLEEP); + assertEquals(splitsCount + 1, tablets.size()); + assertEquals(splitsCount + 1, table.asyncGetTabletsLocations(DEFAULT_SLEEP).join().size()); + for (LocatedTablet tablet : tablets) { + assertEquals(3, tablet.getReplicas().size()); + } + return table; + } + + @Test(timeout = 100000) + public void testGetRangePartitions() throws Exception { + ArrayList columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("a", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("b", Type.INT8).key(true).build()); + Schema schema = new Schema(columns); + + CreateTableOptions builder = new CreateTableOptions(); + builder.addHashPartitions(ImmutableList.of("a"), 2); + builder.addHashPartitions(ImmutableList.of("b"), 2); + builder.setRangePartitionColumns(ImmutableList.of("a", "b")); + + PartialRow bottom = schema.newPartialRow(); + PartialRow middle = schema.newPartialRow(); + middle.addString("a", ""); + middle.addByte("b", (byte) -100); + PartialRow upper = schema.newPartialRow(); + + builder.addRangePartition(bottom, middle); + builder.addRangePartition(middle, upper); + + KuduTable table = client.createTable(tableName, schema, builder); + + List rangePartitions = + table.getRangePartitions(client.getDefaultOperationTimeoutMs()); + assertEquals(rangePartitions.size(), 2); + + Partition lowerPartition = rangePartitions.get(0); + assertEquals(0, lowerPartition.getRangeKeyStart().length); + assertTrue(lowerPartition.getRangeKeyEnd().length > 0); + PartialRow decodedLower = lowerPartition.getDecodedRangeKeyEnd(table); + assertEquals("", decodedLower.getString("a")); + assertEquals((byte) -100, decodedLower.getByte("b")); + + Partition upperPartition = rangePartitions.get(1); + assertTrue(upperPartition.getRangeKeyStart().length > 0); + assertEquals(0, upperPartition.getRangeKeyEnd().length); + PartialRow decodedUpper = upperPartition.getDecodedRangeKeyStart(table); + assertEquals("", decodedUpper.getString("a")); + assertEquals((byte) -100, decodedUpper.getByte("b")); + } + + @Test(timeout = 100000) + public void testGetRangePartitionsUnbounded() throws Exception { + CreateTableOptions builder = getBasicCreateTableOptions(); + KuduTable table = client.createTable(tableName, BASIC_SCHEMA, builder); + + List rangePartitions = + table.getRangePartitions(client.getDefaultOperationTimeoutMs()); + assertEquals(rangePartitions.size(), 1); + Partition partition = rangePartitions.get(0); + assertEquals(0, partition.getRangeKeyStart().length); + assertEquals(0, partition.getRangeKeyEnd().length); + } + + @Test(timeout = 100000) + public void testGetRangePartitionsWithTableHashSchema() throws Exception { + // The test table is created with the following ranges: + // (-inf, -100) [-100, 0) [0, 100), [100, +inf) + + CreateTableOptions builder = getBasicCreateTableOptions(); + // Add table-wide schema with one dimensions and two buckets. + builder.addHashPartitions(ImmutableList.of("key"), 2, 0); + + // Add range partition with custom hash schema: (-inf, -100) + { + PartialRow lower = basicSchema.newPartialRow(); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, -100); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 2, 1); + + builder.addRangePartition(rangePartition); + } + + // Add range partition with table-wide hash schema: [-100, 0) + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, -100); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 0); + + builder.addRangePartition(lower, upper); + } + + // Add range partition with custom hash schema: [0, 100) + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 0); + PartialRow upper = basicSchema.newPartialRow(); + upper.addInt(0, 100); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("key"), 5, 0); + + builder.addRangePartition(rangePartition); + } + + // Add range partition with table-wide hash schema: [100, +inf) + { + PartialRow lower = basicSchema.newPartialRow(); + lower.addInt(0, 100); + PartialRow upper = basicSchema.newPartialRow(); + + builder.addRangePartition(lower, upper); + } + + final KuduTable table = client.createTable(tableName, basicSchema, builder); + List rangePartitions = + table.getRangePartitionsWithTableHashSchema(client.getDefaultOperationTimeoutMs()); + assertEquals(rangePartitions.size(), 2); + + Partition lowerPartition = rangePartitions.get(0); + assertTrue(lowerPartition.getRangeKeyStart().length > 0); + assertTrue(lowerPartition.getRangeKeyEnd().length > 0); + PartialRow decodedLower = lowerPartition.getDecodedRangeKeyStart(table); + assertEquals(-100, decodedLower.getInt("key")); + PartialRow decodedUpper = lowerPartition.getDecodedRangeKeyEnd(table); + assertEquals(0, decodedUpper.getInt("key")); + + Partition upperPartition = rangePartitions.get(1); + assertTrue(upperPartition.getRangeKeyStart().length > 0); + assertEquals(0, upperPartition.getRangeKeyEnd().length); + PartialRow decodedLowerKey = upperPartition.getDecodedRangeKeyStart(table); + assertEquals(100, decodedLowerKey.getInt("key")); + } + + @Test(timeout = 100000) + public void testAlterNoWait() throws Exception { + client.createTable(tableName, basicSchema, getBasicCreateTableOptions()); + + String oldName = "column2_i"; + for (int i = 0; i < 10; i++) { + String newName = String.format("foo%d", i); + client.alterTable(tableName, new AlterTableOptions() + .renameColumn(oldName, newName) + .setWait(false)); + + // We didn't wait for the column rename to finish, so we should be able + // to still see 'oldName' and not yet see 'newName'. However, this is + // timing dependent: if the alter finishes before we reload the schema, + // loop and try again. + KuduTable table = client.openTable(tableName); + try { + table.getSchema().getColumn(oldName); + } catch (IllegalArgumentException e) { + LOG.info("Alter finished too quickly (old column name {} is already " + + "gone), trying again", oldName); + oldName = newName; + continue; + } + try { + table.getSchema().getColumn(newName); + fail(String.format("New column name %s should not yet be visible", newName)); + } catch (IllegalArgumentException e) { + // ignored + } + + // After waiting for the alter to finish and reloading the schema, + // 'newName' should be visible and 'oldName' should be gone. + assertTrue(client.isAlterTableDone(tableName)); + table = client.openTable(tableName); + try { + table.getSchema().getColumn(oldName); + fail(String.format("Old column name %s should not be visible", oldName)); + } catch (IllegalArgumentException e) { + // ignored + } + table.getSchema().getColumn(newName); + LOG.info("Test passed on attempt {}", i + 1); + return; + } + fail("Could not run test even after multiple attempts"); + } + + @Test(timeout = 100000) + public void testNumReplicas() throws Exception { + for (int i = 1; i <= 3; i++) { + // Ignore even numbers. + if (i % 2 != 0) { + String tableName = "testNumReplicas" + "-" + i; + CreateTableOptions options = getBasicCreateTableOptions(); + options.setNumReplicas(i); + client.createTable(tableName, basicSchema, options); + KuduTable table = client.openTable(tableName); + assertEquals(i, table.getNumReplicas()); + } + } + } + + @Test(timeout = 100000) + public void testAlterColumnComment() throws Exception { + // Schema with comments. + List columns = ImmutableList.of( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32) + .key(true).comment("keytest").build(), + new ColumnSchema.ColumnSchemaBuilder("value", Type.STRING) + .comment("valuetest").build()); + // Create a table. + KuduTable table = client.createTable(tableName, + new Schema(columns), getBasicCreateTableOptions()); + + // Verify the comments after creating. + assertEquals("wrong key comment", "keytest", + table.getSchema().getColumn("key").getComment()); + assertEquals("wrong value comment", "valuetest", + table.getSchema().getColumn("value").getComment()); + + // Change the comments. + client.alterTable(tableName, + new AlterTableOptions().changeComment("key", "keycomment")); + client.alterTable(tableName, + new AlterTableOptions().changeComment("value", "valuecomment")); + + // Verify the comments after the first change. + KuduTable table1 = client.openTable(tableName); + assertEquals("wrong key comment post alter", + "keycomment", table1.getSchema().getColumn("key").getComment()); + assertEquals("wrong value comment post alter", + "valuecomment", table1.getSchema().getColumn("value").getComment()); + + // Delete the comments. + client.alterTable(tableName, + new AlterTableOptions().changeComment("key", "")); + client.alterTable(tableName, + new AlterTableOptions().changeComment("value", "")); + + // Verify the comments after the second change. + KuduTable table2 = client.openTable(tableName); + assertEquals("wrong key comment post alter", "", + table2.getSchema().getColumn("key").getComment()); + assertEquals("wrong value comment post alter", "", + table2.getSchema().getColumn("value").getComment()); + } + + @Test(timeout = 100000) + public void testAlterTableAddRangePartitionCustomHashSchemaOverlapped() throws Exception { + final List columns = ImmutableList.of( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder("value", Type.STRING).build()); + final Schema schema = new Schema(columns); + + CreateTableOptions options = getBasicCreateTableOptions(); + // Add table-wide schema for the table. + options.addHashPartitions(ImmutableList.of("key"), 2, 0); + + client.createTable(tableName, schema, options); + + // Originally, there are no range partitions in the newly created table. + assertEquals( + ImmutableList.of("UNBOUNDED"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + + PartialRow lower = schema.newPartialRow(); + lower.addInt(0, -1); + PartialRow upper = schema.newPartialRow(); + upper.addInt(0, 1); + + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("key"), 3, 0); + + try { + client.alterTable(tableName, new AlterTableOptions().addRangePartition(range)); + fail("should not be able to add a partition which overlaps with existing unbounded one"); + } catch (KuduException ex) { + final String errmsg = ex.getMessage(); + assertTrue(errmsg, ex.getStatus().isInvalidArgument()); + assertTrue(errmsg, errmsg.matches(".*new range partition conflicts with existing one:.*")); + } + } + + @Test(timeout = 100000) + public void testAlterTableAddRangeCustomHashSchemaWrongBucketsNumber() throws Exception { + final List columns = ImmutableList.of( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder("value", Type.STRING).build()); + final Schema schema = new Schema(columns); + + CreateTableOptions options = getBasicCreateTableOptions(); + // Add table-wide schema for the table. + options.addHashPartitions(ImmutableList.of("key"), 2, 0); + // Add a range partition with table-wide hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addInt(0, -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt(0, 0); + options.addRangePartition(lower, upper); + } + + client.createTable(tableName, schema, options); + + PartialRow lower = schema.newPartialRow(); + lower.addInt(0, 0); + PartialRow upper = schema.newPartialRow(); + upper.addInt(0, 100); + + // Try to add range with a single hash bucket -- it should not be possible. + for (int hashBucketNum = -1; hashBucketNum < 2; ++hashBucketNum) { + try { + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("key"), hashBucketNum, 0); + + client.alterTable(tableName, new AlterTableOptions().addRangePartition(range)); + fail(String.format("should not be able to add a partition with " + + "invalid range-specific hash schema of %d hash buckets", hashBucketNum)); + } catch (KuduException ex) { + final String errmsg = ex.getMessage(); + assertTrue(errmsg, ex.getStatus().isInvalidArgument()); + assertTrue(String.format("%d hash buckets: %s", hashBucketNum, errmsg), + errmsg.matches("must have at least two hash buckets")); + } + } + } + + @Test(timeout = 100000) + @KuduTestHarness.MasterServerConfig(flags = { + "--enable_per_range_hash_schemas=false", + }) + public void testTryCreateTableRangeWithCustomHashSchema() throws Exception { + final List columns = ImmutableList.of( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder("value", Type.STRING).build()); + final Schema schema = new Schema(columns); + + CreateTableOptions options = getBasicCreateTableOptions(); + // Define the table-wide schema. + options.addHashPartitions(ImmutableList.of("key"), 2, 0); + + PartialRow lower = schema.newPartialRow(); + lower.addInt(0, -1); + PartialRow upper = schema.newPartialRow(); + upper.addInt(0, 1); + + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("key"), 3, 0); + options.addRangePartition(range); + + try { + client.createTable(tableName, schema, options); + fail("shouldn't be able to create a table with range-specific hash schema " + + "when server side doesn't support required RANGE_SPECIFIC_HASH_SCHEMA feature"); + } catch (KuduException ex) { + final String errmsg = ex.getMessage(); + assertTrue(errmsg, ex.getStatus().isRemoteError()); + assertTrue(errmsg, errmsg.matches( + ".* server sent error unsupported feature flags")); + } + } + + @Test(timeout = 100000) + public void testAlterTableAddRangePartitionCustomHashSchema() throws Exception { + final List columns = ImmutableList.of( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder("value", Type.STRING).nullable(true).build()); + final Schema schema = new Schema(columns); + + CreateTableOptions builder = getBasicCreateTableOptions(); + // Add table-wide schema for the table. + builder.addHashPartitions(ImmutableList.of("key"), 2, 0); + + // Add a range partition with table-wide hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addInt(0, -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt(0, 100); + builder.addRangePartition(lower, upper); + } + + client.createTable(tableName, schema, builder); + + assertEquals( + ImmutableList.of("-100 <= VALUES < 100"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + assertEquals( + ImmutableList.of("-100 <= VALUES < 100 HASH(key) PARTITIONS 2"), + client.openTable(tableName).getFormattedRangePartitionsWithHashSchema(10000)); + + { + PartialRow lower = schema.newPartialRow(); + lower.addInt(0, 100); + PartialRow upper = schema.newPartialRow(); + upper.addInt(0, 200); + + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("key"), 7, 0); + + client.alterTable( + tableName, new AlterTableOptions().addRangePartition(range)); + } + + final KuduTable table = client.openTable(tableName); + + assertEquals( + ImmutableList.of("-100 <= VALUES < 100", "100 <= VALUES < 200"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + assertEquals( + ImmutableList.of( + "-100 <= VALUES < 100 HASH(key) PARTITIONS 2", + "100 <= VALUES < 200 HASH(key) PARTITIONS 7"), + client.openTable(tableName).getFormattedRangePartitionsWithHashSchema(10000)); + + final PartitionSchema ps = client.openTable(tableName).getPartitionSchema(); + assertTrue(ps.hasCustomHashSchemas()); + + { + // NOTE: use schema from server since ColumnIDs are needed for row encoding + final PartialRow rowLower = table.getSchema().newPartialRow(); + rowLower.addInt(0, -100); + + final PartialRow rowUpper = table.getSchema().newPartialRow(); + rowUpper.addInt(0, 100); + + // There should be 2 tablets for the range with the table-wide hash schema + // adding during table creation of the table. + { + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(rowLower, ps.getRangeSchema())); + // There should be just one dimension with 2 buckets. + assertEquals(1, s.size()); + assertEquals(2, s.get(0).getNumBuckets()); + } + { + final byte[] rowLowerEnc = ps.encodePartitionKey(rowLower); + final byte[] rowUpperEnc = ps.encodePartitionKey(rowUpper); + + // The range part comes after the hash part in an encoded partition key. + // The hash part contains 4 * number_of_hash_dimensions bytes. + byte[] hashLower = Arrays.copyOfRange(rowLowerEnc, 4, rowLowerEnc.length); + byte[] hashUpper = Arrays.copyOfRange(rowUpperEnc, 4, rowUpperEnc.length); + + Set buckets = new HashSet(); + for (KuduScanToken token : new KuduScanToken.KuduScanTokenBuilder(asyncClient, table) + .addPredicate(KuduPredicate.newComparisonPredicate( + columns.get(0), KuduPredicate.ComparisonOp.GREATER_EQUAL, -100)) + .addPredicate(KuduPredicate.newComparisonPredicate( + columns.get(0), KuduPredicate.ComparisonOp.LESS, 100)) + .setTimeout(client.getDefaultOperationTimeoutMs()).build()) { + final Partition p = token.getTablet().getPartition(); + assertEquals(0, Bytes.memcmp(p.getRangeKeyStart(), hashLower)); + assertEquals(0, Bytes.memcmp(p.getRangeKeyEnd(), hashUpper)); + assertEquals(1, p.getHashBuckets().size()); + buckets.add(p.getHashBuckets().get(0)); + } + + // Check that the generated scan tokens cover all the tablets for the range: + // all hash bucket indices should be present. + assertEquals(2, buckets.size()); + for (int i = 0; i < buckets.size(); ++i) { + assertTrue(String.format("must have bucket %d", i), buckets.contains(i)); + } + } + } + + { + // NOTE: use schema from server since ColumnIDs are needed for row encoding + final PartialRow rowLower = table.getSchema().newPartialRow(); + rowLower.addInt(0, 100); + + final PartialRow rowUpper = table.getSchema().newPartialRow(); + rowUpper.addInt(0, 200); + + // There should be 7 tablets for the newly added range: and the newly added + // range with 7 hash buckets. + { + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(rowLower, ps.getRangeSchema())); + // There should be just one dimension with 7 buckets. + assertEquals(1, s.size()); + assertEquals(7, s.get(0).getNumBuckets()); + } + { + final byte[] rowLowerEnc = ps.encodePartitionKey(rowLower); + final byte[] rowUpperEnc = ps.encodePartitionKey(rowUpper); + + // The range part comes after the hash part in an encoded partition key. + // The hash part contains 4 * number_of_hash_dimensions bytes. + byte[] hashLower = Arrays.copyOfRange(rowLowerEnc, 4, rowLowerEnc.length); + byte[] hashUpper = Arrays.copyOfRange(rowUpperEnc, 4, rowUpperEnc.length); + + Set buckets = new HashSet(); + for (KuduScanToken token : new KuduScanToken.KuduScanTokenBuilder(asyncClient, table) + .addPredicate(KuduPredicate.newComparisonPredicate( + columns.get(0), KuduPredicate.ComparisonOp.GREATER_EQUAL, 100)) + .addPredicate(KuduPredicate.newComparisonPredicate( + columns.get(0), KuduPredicate.ComparisonOp.LESS, 200)) + .setTimeout(client.getDefaultOperationTimeoutMs()).build()) { + final Partition p = token.getTablet().getPartition(); + assertEquals(0, Bytes.memcmp(p.getRangeKeyStart(), hashLower)); + assertEquals(0, Bytes.memcmp(p.getRangeKeyEnd(), hashUpper)); + assertEquals(1, p.getHashBuckets().size()); + buckets.add(p.getHashBuckets().get(0)); + } + + // Check that the generated scan tokens cover all the tablets for the range: + // all hash bucket indices should be present. + assertEquals(7, buckets.size()); + for (int i = 0; i < buckets.size(); ++i) { + assertTrue(String.format("must have bucket %d", i), buckets.contains(i)); + } + } + } + + // Make sure it's possible to insert into the newly added range. + KuduSession session = client.newSession(); + { + for (int key = 0; key < 9; ++key) { + insertDefaultRow(table, session, key); + } + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(9, rowStrings.size()); + for (int i = 0; i < rowStrings.size(); i++) { + StringBuilder expectedRow = new StringBuilder(); + expectedRow.append(String.format("INT32 key=%d, STRING value=NULL", i)); + assertEquals(expectedRow.toString(), rowStrings.get(i)); + } + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("key"), GREATER_EQUAL, 8)); + assertEquals(1, rowStrings.size()); + StringBuilder expectedRow = new StringBuilder(); + expectedRow.append(String.format("INT32 key=8, STRING value=NULL")); + assertEquals(expectedRow.toString(), rowStrings.get(0)); + } + + // Insert more rows: those should go into both ranges -- the range with + // the table-wide and the newly added range with custom hash schema. + { + for (int key = 9; key < 200; ++key) { + insertDefaultRow(table, session, key); + } + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(200, rowStrings.size()); + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("key"), GREATER_EQUAL, 100)); + assertEquals(100, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("key"), GREATER_EQUAL, 180)); + assertEquals(20, rowStrings.size()); + } + + // Insert more rows into the range with table-wide hash schema. + { + for (int key = -100; key < 0; ++key) { + insertDefaultRow(table, session, key); + } + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(300, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("key"), LESS, 0)); + assertEquals(100, rowStrings.size()); + + // Predicate to have one part of the rows in the range with table-wide hash + // schema, and the other part from the range with custom hash schema. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("key"), GREATER_EQUAL, 50), + KuduPredicate.newComparisonPredicate(schema.getColumn("key"), LESS, 150)); + assertEquals(100, rowStrings.size()); + } + } + + @Test(timeout = 100000) + @KuduTestHarness.MasterServerConfig(flags = { + "--enable_per_range_hash_schemas=true", + }) + public void testAlterTableAddRangePartitionCustomHashSchemaMultiDimensional() + throws Exception { + final List columns = ImmutableList.of( + new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT64).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder("value", Type.STRING).nullable(true).build()); + final Schema schema = new Schema(columns); + + CreateTableOptions opt = new CreateTableOptions(); + opt.setRangePartitionColumns(ImmutableList.of("c0")); + // Define table-wide schema for the table. + opt.addHashPartitions(ImmutableList.of("c0"), 2, 0); + opt.addHashPartitions(ImmutableList.of("c1"), 3, 0); + + // Add a range partition with table-wide hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addInt(0, -100); + PartialRow upper = schema.newPartialRow(); + upper.addInt(0, 100); + opt.addRangePartition(lower, upper); + } + + client.createTable(tableName, schema, opt); + + assertEquals( + ImmutableList.of("-100 <= VALUES < 100"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + assertEquals( + ImmutableList.of("-100 <= VALUES < 100 HASH(c0) PARTITIONS 2 HASH(c1) PARTITIONS 3"), + client.openTable(tableName).getFormattedRangePartitionsWithHashSchema(10000)); + + { + PartialRow lower = schema.newPartialRow(); + lower.addInt(0, 100); + PartialRow upper = schema.newPartialRow(); + upper.addInt(0, 200); + + RangePartitionWithCustomHashSchema range = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + range.addHashPartitions(ImmutableList.of("c0"), 5, 0); + range.addHashPartitions(ImmutableList.of("c1"), 3, 0); + + client.alterTable( + tableName, new AlterTableOptions().addRangePartition(range)); + } + + assertEquals( + ImmutableList.of("-100 <= VALUES < 100", "100 <= VALUES < 200"), + client.openTable(tableName).getFormattedRangePartitions(10000)); + assertEquals( + ImmutableList.of( + "-100 <= VALUES < 100 HASH(c0) PARTITIONS 2 HASH(c1) PARTITIONS 3", + "100 <= VALUES < 200 HASH(c0) PARTITIONS 5 HASH(c1) PARTITIONS 3"), + client.openTable(tableName).getFormattedRangePartitionsWithHashSchema(10000)); + + final PartitionSchema ps = client.openTable(tableName).getPartitionSchema(); + assertTrue(ps.hasCustomHashSchemas()); + + final KuduTable table = client.openTable(tableName); + { + // NOTE: use schema from server since ColumnIDs are needed for row encoding + // NOTE: setting both 'c0' and 'c1' columns since they are used in + // the hash bucketing schema + final PartialRow rowLower = table.getSchema().newPartialRow(); + rowLower.addInt(0, -100); + rowLower.addLong(1, -100); + + final PartialRow rowUpper = table.getSchema().newPartialRow(); + rowUpper.addInt(0, 100); + rowUpper.addLong(1, 100); + + // There should be 6 tablets for the range with the table-wide hash schema + // adding during table creation of the table. + { + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(rowLower, ps.getRangeSchema())); + // There should be just two dimension: one with 2 buckets and another + // with 3 buckets. + assertEquals(2, s.size()); + assertEquals(2, s.get(0).getNumBuckets()); + assertEquals(3, s.get(1).getNumBuckets()); + } + { + final byte[] rowLowerEnc = ps.encodePartitionKey(rowLower); + final byte[] rowUpperEnc = ps.encodePartitionKey(rowUpper); + + // The range part comes after the hash part in an encoded partition key. + // The hash part contains 4 * number_of_hash_dimensions bytes. + byte[] rangeLower = Arrays.copyOfRange(rowLowerEnc, 4 * 2, rowLowerEnc.length); + byte[] rangeUpper = Arrays.copyOfRange(rowUpperEnc, 4 * 2, rowUpperEnc.length); + + Set> buckets = new HashSet(); + for (KuduScanToken token : new KuduScanToken.KuduScanTokenBuilder(asyncClient, table) + .addPredicate(KuduPredicate.newComparisonPredicate( + columns.get(0), KuduPredicate.ComparisonOp.GREATER_EQUAL, -100)) + .addPredicate(KuduPredicate.newComparisonPredicate( + columns.get(0), KuduPredicate.ComparisonOp.LESS, 100)) + .setTimeout(client.getDefaultOperationTimeoutMs()).build()) { + final Partition p = token.getTablet().getPartition(); + assertEquals(0, Bytes.memcmp(p.getRangeKeyStart(), rangeLower)); + assertEquals(0, Bytes.memcmp(p.getRangeKeyEnd(), rangeUpper)); + assertEquals(2, p.getHashBuckets().size()); + buckets.add(new Pair(p.getHashBuckets().get(0), p.getHashBuckets().get(1))); + } + + // Check that the generated scan tokens cover all the tablets for the range: + // all hash bucket indices should be present. + final Set> refBuckets = ImmutableSet.of( + new Pair(0, 0), new Pair(0, 1), new Pair(0, 2), + new Pair(1, 0), new Pair(1, 1), new Pair(1, 2)); + assertEquals(refBuckets, buckets); + } + } + + { + // NOTE: use schema from server since ColumnIDs are needed for row encoding + // NOTE: setting both 'c0' and 'c1' columns since they are used in + // the hash bucketing schema + final PartialRow rowLower = table.getSchema().newPartialRow(); + rowLower.addInt(0, 100); + rowLower.addLong(1, 100); + + final PartialRow rowUpper = table.getSchema().newPartialRow(); + rowUpper.addInt(0, 200); + rowUpper.addLong(1, 200); + + // There should be 15 tablets for the newly added range. + { + final List s = ps.getHashSchemaForRange( + KeyEncoder.encodeRangePartitionKey(rowLower, ps.getRangeSchema())); + // There should be just two dimensions with 5 * 3 = 15 buckets. + assertEquals(2, s.size()); + assertEquals(5, s.get(0).getNumBuckets()); + assertEquals(3, s.get(1).getNumBuckets()); + } + { + final byte[] rowLowerEnc = ps.encodePartitionKey(rowLower); + final byte[] rowUpperEnc = ps.encodePartitionKey(rowUpper); + + // The range part comes after the hash part in an encoded partition key. + // The hash part contains 4 * number_of_hash_dimensions bytes. + byte[] hashLower = Arrays.copyOfRange(rowLowerEnc, 4 * 2, rowLowerEnc.length); + byte[] hashUpper = Arrays.copyOfRange(rowUpperEnc, 4 * 2, rowUpperEnc.length); + + Set> buckets = new HashSet(); + for (KuduScanToken token : new KuduScanToken.KuduScanTokenBuilder(asyncClient, table) + .addPredicate(KuduPredicate.newComparisonPredicate( + columns.get(0), KuduPredicate.ComparisonOp.GREATER_EQUAL, 100)) + .addPredicate(KuduPredicate.newComparisonPredicate( + columns.get(0), KuduPredicate.ComparisonOp.LESS, 200)) + .setTimeout(client.getDefaultOperationTimeoutMs()).build()) { + final Partition p = token.getTablet().getPartition(); + assertEquals(0, Bytes.memcmp(p.getRangeKeyStart(), hashLower)); + assertEquals(0, Bytes.memcmp(p.getRangeKeyEnd(), hashUpper)); + assertEquals(2, p.getHashBuckets().size()); + buckets.add(new Pair(p.getHashBuckets().get(0), p.getHashBuckets().get(1))); + } + + // Check that the generated scan tokens cover all the tablets for the range: + // all hash bucket indices should be present. + final Set> refBuckets = ImmutableSet.of( + new Pair(0, 0), new Pair(0, 1), new Pair(0, 2), + new Pair(1, 0), new Pair(1, 1), new Pair(1, 2), + new Pair(2, 0), new Pair(2, 1), new Pair(2, 2), + new Pair(3, 0), new Pair(3, 1), new Pair(3, 2), + new Pair(4, 0), new Pair(4, 1), new Pair(4, 2)); + assertEquals(refBuckets, buckets); + } + } + + // Make sure it's possible to insert into the newly added range. + KuduSession session = client.newSession(); + + // Insert rows: those should go into both ranges -- the range with + // the table-wide and the newly added range with custom hash schema. + { + for (int key = 0; key < 200; ++key) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt("c0", key); + row.addLong("c1", key); + session.apply(insert); + } + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(200, rowStrings.size()); + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("c0"), GREATER_EQUAL, 100)); + assertEquals(100, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("c0"), GREATER_EQUAL, 180)); + assertEquals(20, rowStrings.size()); + } + + // Insert more rows into the range with table-wide hash schema. + { + for (int key = -100; key < 0; ++key) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt("c0", key); + row.addLong("c1", key); + session.apply(insert); + } + session.flush(); + + List rowStrings = scanTableToStrings(table); + assertEquals(300, rowStrings.size()); + + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("c0"), LESS, 0)); + assertEquals(100, rowStrings.size()); + + // Predicate to have one part of the rows in the range with table-wide hash + // schema, and the other part from the range with custom hash schema. + rowStrings = scanTableToStrings(table, + KuduPredicate.newComparisonPredicate(schema.getColumn("c0"), GREATER_EQUAL, 50), + KuduPredicate.newComparisonPredicate(schema.getColumn("c0"), LESS, 150)); + assertEquals(100, rowStrings.size()); + } + } + + @Test(timeout = 100000) + @SuppressWarnings("deprecation") + public void testDimensionLabel() throws Exception { + // Create a table with dimension label. + final KuduTable table = client.createTable(tableName, basicSchema, + getBasicTableOptionsWithNonCoveredRange().setDimensionLabel("labelA")); + + // Add a range partition to the table with dimension label. + AlterTableOptions ato = new AlterTableOptions(); + PartialRow lowerBound = BASIC_SCHEMA.newPartialRow(); + lowerBound.addInt("key", 300); + PartialRow upperBound = BASIC_SCHEMA.newPartialRow(); + upperBound.addInt("key", 400); + ato.addRangePartition(lowerBound, upperBound, "labelB", + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + client.alterTable(tableName, ato); + + Map dimensionMap = new HashMap<>(); + for (LocatedTablet tablet : table.getTabletsLocations(DEFAULT_SLEEP)) { + for (LocatedTablet.Replica replica : tablet.getReplicas()) { + Integer number = dimensionMap.get(replica.getDimensionLabel()); + if (number == null) { + number = 0; + } + dimensionMap.put(replica.getDimensionLabel(), number + 1); + } + } + assertEquals(9, dimensionMap.get("labelA").intValue()); + assertEquals(3, dimensionMap.get("labelB").intValue()); + } + + @Test(timeout = 100000) + @KuduTestHarness.TabletServerConfig(flags = { + "--update_tablet_stats_interval_ms=200", + "--heartbeat_interval_ms=100", + }) + public void testGetTableStatistics() throws Exception { + // Create a table. + CreateTableOptions builder = getBasicCreateTableOptions(); + KuduTable table = client.createTable(tableName, BASIC_SCHEMA, builder); + + // Insert some rows and test the statistics. + KuduTableStatistics prevStatistics = new KuduTableStatistics(-1, -1); + KuduTableStatistics currentStatistics; + KuduSession session = client.newSession(); + int num = 100; + for (int i = 0; i < num; ++i) { + // Get current table statistics. + currentStatistics = table.getTableStatistics(); + assertTrue(currentStatistics.getOnDiskSize() >= prevStatistics.getOnDiskSize()); + assertTrue(currentStatistics.getLiveRowCount() >= prevStatistics.getLiveRowCount()); + assertTrue(currentStatistics.getLiveRowCount() <= i + 1); + prevStatistics = currentStatistics; + // Insert row. + Insert insert = createBasicSchemaInsert(table, i); + session.apply(insert); + List rows = scanTableToStrings(table); + assertEquals("wrong number of rows", i + 1, rows.size()); + } + + // Final accuracy test. + // Wait for master to aggregate table statistics. + Thread.sleep(200 * 6); + currentStatistics = table.getTableStatistics(); + assertTrue(currentStatistics.getOnDiskSize() >= prevStatistics.getOnDiskSize()); + assertTrue(currentStatistics.getLiveRowCount() >= prevStatistics.getLiveRowCount()); + assertEquals(num, currentStatistics.getLiveRowCount()); + } + + @Test(timeout = 100000) + @KuduTestHarness.MasterServerConfig(flags = { + "--master_support_immutable_column_attribute=false" + }) + public void testCreateTableWithImmuColsWhenMasterNotSupport() throws Exception { + try { + CreateTableOptions builder = getBasicCreateTableOptions(); + client.createTable(tableName, createSchemaWithImmutableColumns(), builder); + fail("shouldn't be able to create a table with immutable columns " + + "when server side doesn't support required IMMUTABLE_COLUMN_ATTRIBUTE feature"); + } catch (KuduException ex) { + final String errmsg = ex.getMessage(); + assertTrue(errmsg, ex.getStatus().isRemoteError()); + assertTrue(errmsg, errmsg.matches( + ".* server sent error unsupported feature flags")); + } + } + + @Test(timeout = 100000) + @KuduTestHarness.MasterServerConfig(flags = { + "--master_support_immutable_column_attribute=false" + }) + public void testAlterTableAddImmuColsWhenMasterNotSupport() throws Exception { + CreateTableOptions builder = getBasicCreateTableOptions(); + client.createTable(tableName, BASIC_SCHEMA, builder); + final ColumnSchema immu_col = new ColumnSchema.ColumnSchemaBuilder("immu_col", Type.INT32) + .nullable(true).immutable(true).build(); + try { + client.alterTable(tableName, new AlterTableOptions().addColumn(immu_col)); + fail("shouldn't be able to alter a table to add a column with immutable attribute " + + "when server side doesn't support required IMMUTABLE_COLUMN_ATTRIBUTE feature"); + } catch (KuduException ex) { + final String errmsg = ex.getMessage(); + assertTrue(errmsg, ex.getStatus().isRemoteError()); + assertTrue(errmsg, errmsg.matches( + ".* server sent error unsupported feature flags")); + } + } + + @Test(timeout = 100000) + @KuduTestHarness.MasterServerConfig(flags = { + "--master_support_immutable_column_attribute=false" + }) + public void testAlterTableAlterImmuColsWhenMasterNotSupport() throws Exception { + CreateTableOptions builder = getBasicCreateTableOptions(); + client.createTable(tableName, BASIC_SCHEMA, builder); + try { + client.alterTable(tableName, new AlterTableOptions().changeImmutable("column1_i", true)); + fail("shouldn't be able to alter a table to change the immutable attribute on a column " + + "when server side doesn't support required IMMUTABLE_COLUMN_ATTRIBUTE feature"); + } catch (KuduException ex) { + final String errmsg = ex.getMessage(); + assertTrue(errmsg, ex.getStatus().isRemoteError()); + assertTrue(errmsg, errmsg.matches( + ".* server sent error unsupported feature flags")); + } + + // No matter if the table has an immutable attribute column or not, we can test the function + // on the client side, the request will be processed by the generic RPC code and throw an + // exception before reaching particular application code. + try { + client.alterTable(tableName, new AlterTableOptions().changeImmutable("column1_i", false)); + fail("shouldn't be able to alter a table to change the immutable attribute on a column " + + "when server side doesn't support required IMMUTABLE_COLUMN_ATTRIBUTE feature"); + } catch (KuduException ex) { + final String errmsg = ex.getMessage(); + assertTrue(errmsg, ex.getStatus().isRemoteError()); + assertTrue(errmsg, errmsg.matches( + ".* server sent error unsupported feature flags")); + } + } + + /** + * Test creating table schemas with non unique primary key columns and + * auto-incrementing columns. + */ + @Test(timeout = 100000) + public void testCreateSchemaWithNonUniquePrimaryKeys() throws Exception { + // Create a schema with two non unique primary key columns and + // verify the resulting table's schema. + ArrayList columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32) + .nonUniqueKey(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key2", Type.INT64) + .nonUniqueKey(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(true).build()); + Schema schema = new Schema(columns); + assertFalse(schema.isPrimaryKeyUnique()); + assertTrue(schema.hasAutoIncrementingColumn()); + assertEquals(4, schema.getColumnCount()); + assertEquals(3, schema.getPrimaryKeyColumnCount()); + client.createTable(tableName, schema, getBasicCreateTableOptions()); + KuduTable table = client.openTable(tableName); + schema = table.getSchema(); + assertFalse(schema.isPrimaryKeyUnique()); + assertTrue(schema.hasAutoIncrementingColumn()); + assertEquals(4, schema.getColumnCount()); + assertEquals(3, schema.getPrimaryKeyColumnCount()); + client.deleteTable(tableName); + + // Create a schema with non unique primary key column and unique primary key column + columns.clear(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32) + .nonUniqueKey(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key2", Type.INT32) + .key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(true).build()); + try { + new Schema(columns); + fail("Schema with mixture of unique key and non unique key"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains( + "Mixture of unique key and non unique key in a table")); + } + + // Create a schema with an auto-incrementing column which is marked as non unique + // primary key and verify the resulting table's schema. + columns.clear(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32) + .nonUniqueKey(true).build()); + columns.add(new ColumnSchema.AutoIncrementingColumnSchemaBuilder().build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(true).build()); + schema = new Schema(columns); + assertTrue(schema.hasAutoIncrementingColumn()); + assertFalse(schema.isPrimaryKeyUnique()); + assertEquals(3, schema.getColumnCount()); + assertEquals(2, schema.getPrimaryKeyColumnCount()); + client.createTable(tableName, schema, getBasicCreateTableOptions()); + table = client.openTable(tableName); + schema = table.getSchema(); + assertTrue(schema.hasAutoIncrementingColumn()); + assertFalse(schema.isPrimaryKeyUnique()); + assertEquals(3, schema.getColumnCount()); + assertEquals(2, schema.getPrimaryKeyColumnCount()); + client.deleteTable(tableName); + + // Create a schema with a single auto-incrementing column which is marked as non + // unique primary key, and verify the resulting table's schema. + columns.clear(); + columns.add(new ColumnSchema.AutoIncrementingColumnSchemaBuilder().build()); + schema = new Schema(columns); + assertTrue(schema.hasAutoIncrementingColumn()); + assertFalse(schema.isPrimaryKeyUnique()); + assertEquals(1, schema.getColumnCount()); + assertEquals(1, schema.getPrimaryKeyColumnCount()); + CreateTableOptions builder = new CreateTableOptions(); + builder.setRangePartitionColumns(ImmutableList.of(Schema.getAutoIncrementingColumnName())); + client.createTable(tableName, schema, builder); + table = client.openTable(tableName); + schema = table.getSchema(); + assertTrue(schema.hasAutoIncrementingColumn()); + assertFalse(schema.isPrimaryKeyUnique()); + assertEquals(1, schema.getColumnCount()); + assertEquals(1, schema.getPrimaryKeyColumnCount()); + client.deleteTable(tableName); + + // Create a schema with two auto-incrementing columns + columns.clear(); + columns.add(new ColumnSchema.AutoIncrementingColumnSchemaBuilder().build()); + columns.add(new ColumnSchema.AutoIncrementingColumnSchemaBuilder().build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(true).build()); + try { + new Schema(columns); + fail("Schema with two auto-incrementing columns"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains( + "More than one columns are set as auto-incrementing columns")); + } + } + + @Test(timeout = 100000) + @KuduTestHarness.MasterServerConfig(flags = { + "--master_support_auto_incrementing_column=false" + }) + public void testCreateTableWithAutoIncrementingColWhenMasterNotSupport() throws Exception { + try { + CreateTableOptions builder = getBasicCreateTableOptions(); + client.createTable(tableName, createSchemaWithNonUniqueKey(), builder); + fail("shouldn't be able to create a table with auto-incrementing column " + + "when server side doesn't support required AUTO_INCREMENTING_COLUMN feature"); + } catch (KuduException ex) { + final String errmsg = ex.getMessage(); + assertTrue(errmsg, ex.getStatus().isRemoteError()); + assertTrue(errmsg, errmsg.matches( + ".* server sent error unsupported feature flags")); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTransaction.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTransaction.java new file mode 100644 index 0000000000..f5430b411c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTransaction.java @@ -0,0 +1,1687 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.junit.AssertHelpers.assertEventuallyTrue; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import com.google.common.collect.ImmutableList; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.function.ThrowingRunnable; + +import org.apache.kudu.test.ClientTestUtil; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.KuduTestHarness.MasterServerConfig; +import org.apache.kudu.test.KuduTestHarness.TabletServerConfig; +import org.apache.kudu.test.junit.AssertHelpers.BooleanExpression; +import org.apache.kudu.transactions.Transactions.TxnTokenPB; + + +public class TestKuduTransaction { + private KuduClient client; + private AsyncKuduClient asyncClient; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + } + + private KuduTransaction makeFakeTransaction(KuduTransaction txn) throws IOException { + byte[] buf = txn.serialize(); + final TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + assertTrue(pb.hasTxnId()); + final long txnId = pb.getTxnId(); + assertTrue(txnId > AsyncKuduClient.INVALID_TXN_ID); + + final long fakeTxnId = txnId + 123; + + TxnTokenPB.Builder b = TxnTokenPB.newBuilder(); + b.setTxnId(fakeTxnId); + b.setEnableKeepalive(false); + b.setKeepaliveMillis(0); + TxnTokenPB message = b.build(); + byte[] fakeTxnBuf = new byte[message.getSerializedSize()]; + CodedOutputStream cos = CodedOutputStream.newInstance(fakeTxnBuf); + message.writeTo(cos); + cos.flush(); + return KuduTransaction.deserialize(fakeTxnBuf, asyncClient); + } + + /** + * Test scenario that starts a new transaction given an instance of + * KuduClient. The purpose of this test is to make sure it's possible + * to start a new transaction given a KuduClient object. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testNewTransaction() throws Exception { + KuduTransaction txn = client.newTransaction(); + assertNotNull(txn); + byte[] buf = txn.serialize(); + assertNotNull(buf); + final TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + assertTrue(pb.hasTxnId()); + assertTrue(pb.getTxnId() > AsyncKuduClient.INVALID_TXN_ID); + assertTrue(pb.hasEnableKeepalive()); + // By default, keepalive is disabled for a serialized txn token. + assertFalse(pb.getEnableKeepalive()); + assertTrue(pb.hasKeepaliveMillis()); + assertTrue(pb.getKeepaliveMillis() > 0); + } + + /** + * Test scenario that starts many new transaction given an instance of + * KuduClient. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testStartManyTransactions() throws Exception { + List transactions = new ArrayList<>(); + for (int i = 0; i < 1000; ++i) { + KuduTransaction txn = client.newTransaction(); + assertNotNull(txn); + transactions.add(txn); + } + for (KuduTransaction txn : transactions) { + txn.rollback(); + } + } + + /** + * Test scenario that starts a new transaction and rolls it back. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testRollbackAnEmptyTransaction() throws Exception { + KuduTransaction txn = client.newTransaction(); + txn.rollback(); + // A duplicate call to rollback an aborted transaction using the same + // handle should report an error. + IllegalStateException ex = assertThrows( + IllegalStateException.class, new ThrowingRunnable() { + @Override + public void run() throws Throwable { + txn.rollback(); + } + }); + assertEquals("transaction is not open for this handle", ex.getMessage()); + + // Try to rollback the same transaction using another handle that has been + // constructed using serialize/deserialize sequence: it should be fine + // since aborting a transaction has idempotent semantics for the back-end. + byte[] buf = txn.serialize(); + KuduTransaction serdesTxn = KuduTransaction.deserialize(buf, asyncClient); + serdesTxn.rollback(); + } + + /** + * Test scenario that starts a new transaction and commits it right away. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--txn_schedule_background_tasks=false", + "--enable_txn_system_client_init=true", + }) + public void testCommitAnEmptyTransaction() throws Exception { + KuduTransaction txn = client.newTransaction(); + txn.startCommit(); + // A duplicate call to commit the transaction using the same handle + // should fail. + IllegalStateException ex = assertThrows( + IllegalStateException.class, new ThrowingRunnable() { + @Override + public void run() throws Throwable { + txn.startCommit(); + } + }); + assertEquals("transaction is not open for this handle", ex.getMessage()); + + // Try to commit the same transaction using another handle that has been + // constructed using serialize/deserialize sequence: it should be fine + // since committing a transaction has idempotent semantics for the back-end. + byte[] buf = txn.serialize(); + KuduTransaction serdesTxn = KuduTransaction.deserialize(buf, asyncClient); + serdesTxn.startCommit(); + } + + /** + * Test scenario that tries to commit a non-existent transaction. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testCommitNonExistentTransaction() throws Exception { + KuduTransaction txn = client.newTransaction(); + assertNotNull(txn); + KuduTransaction fakeTxn = makeFakeTransaction(txn); + try { + // Try to commit the transaction in non-synchronous mode, i.e. just + // initiate committing the transaction. + fakeTxn.startCommit(); + fail("committing a non-existing transaction should have failed"); + } catch (NonRecoverableException e) { + final String errmsg = e.getMessage(); + final Status status = e.getStatus(); + assertTrue(status.toString(), status.isInvalidArgument()); + assertTrue(errmsg, errmsg.matches(".*transaction ID .* not found.*")); + } catch (Exception e) { + fail("unexpected exception: " + e.toString()); + } + + try { + // Try to commit the transaction in synchronous mode, i.e. initiate + // committing the transaction and wait for the commit phase to finalize. + fakeTxn.commit(); + fail("committing a non-existing transaction should have failed"); + } catch (NonRecoverableException e) { + final String errmsg = e.getMessage(); + final Status status = e.getStatus(); + assertTrue(status.toString(), status.isInvalidArgument()); + assertTrue(errmsg, errmsg.matches(".*transaction ID .* not found.*")); + } catch (Exception e) { + fail("unexpected exception: " + e.toString()); + } + } + + /** + * Transactional sessions can be closed as regular ones. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testTxnSessionClose() throws Exception { + final String TABLE_NAME = "txn_session_close"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + KuduTable table = client.openTable(TABLE_NAME); + + // Open and close an empty transaction session. + { + KuduTransaction txn = client.newTransaction(); + assertNotNull(txn); + KuduSession session = txn.newKuduSession(); + assertNotNull(session); + assertFalse(session.isClosed()); + session.close(); + assertTrue(session.isClosed()); + } + + // Open new transaction, insert one row for a session, close the session + // and then rollback the transaction. No rows should be persisted. + { + KuduTransaction txn = client.newTransaction(); + assertNotNull(txn); + KuduSession session = txn.newKuduSession(); + assertNotNull(session); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + Insert insert = createBasicSchemaInsert(table, 1); + session.apply(insert); + session.close(); + + txn.rollback(); + + assertTrue(session.isClosed()); + assertEquals(0, session.countPendingErrors()); + + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .build(); + assertEquals(0, countRowsInScan(scanner)); + } + } + + /** + * Test scenario that starts a new transaction, initiates its commit phase, + * and checks whether the commit is complete using the + * KuduTransaction.isCommitComplete() method. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--txn_schedule_background_tasks=false", + "--txn_status_manager_inject_latency_finalize_commit_ms=1000", + "--enable_txn_system_client_init=true", + }) + public void testIsCommitComplete() throws Exception { + KuduTransaction txn = client.newTransaction(); + txn.startCommit(); + assertFalse(txn.isCommitComplete()); + } + + /** + * Verify how KuduTransaction.isCommitComplete() works for a transaction handle + * in a few special cases. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--txn_schedule_background_tasks=false", + "--enable_txn_system_client_init=true", + }) + public void testIsCommitCompleteSpecialCases() throws Exception { + KuduTransaction txn = client.newTransaction(); + + { + NonRecoverableException ex = assertThrows( + NonRecoverableException.class, new ThrowingRunnable() { + @Override + public void run() throws Throwable { + txn.isCommitComplete(); + } + }); + assertTrue(ex.getStatus().isIllegalState()); + assertEquals("transaction is still open", ex.getMessage()); + } + + // Rollback the transaction. + txn.rollback(); + + { + NonRecoverableException ex = assertThrows( + NonRecoverableException.class, new ThrowingRunnable() { + @Override + public void run() throws Throwable { + txn.isCommitComplete(); + } + }); + assertTrue(ex.getStatus().isAborted()); + assertEquals("transaction is being aborted", ex.getMessage()); + } + + // Try to call isCommitComplete() on a handle that isn't backed by any + // transaction registered with the system. + { + KuduTransaction fakeTxn = makeFakeTransaction(txn); + NonRecoverableException ex = assertThrows( + NonRecoverableException.class, new ThrowingRunnable() { + @Override + public void run() throws Throwable { + fakeTxn.isCommitComplete(); + } + }); + final Status status = ex.getStatus(); + assertTrue(status.toString(), status.isInvalidArgument()); + final String errmsg = ex.getMessage(); + assertTrue(errmsg, errmsg.matches(".*transaction ID .* not found.*")); + } + } + + /** + * Test scenario that starts a new empty transaction and commits it in a + * synchronous way (i.e. waits for the transaction to be committed). + * + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testCommitAnEmptyTransactionWait() throws Exception { + KuduTransaction txn = client.newTransaction(); + txn.commit(); + assertTrue(txn.isCommitComplete()); + } + + /** + * Test scenario that tries to rollback a non-existent transaction. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testRollbackNonExistentTransaction() throws Exception { + KuduTransaction txn = client.newTransaction(); + assertNotNull(txn); + KuduTransaction fakeTxn = makeFakeTransaction(txn); + try { + fakeTxn.rollback(); + fail("rolling back non-existing transaction should have failed"); + } catch (NonRecoverableException e) { + final String errmsg = e.getMessage(); + final Status status = e.getStatus(); + assertTrue(status.toString(), status.isInvalidArgument()); + assertTrue(errmsg, errmsg.matches(".*transaction ID .* not found.*")); + } catch (Exception e) { + fail("unexpected exception: " + e.toString()); + } + } + + /** + * Test scenario that starts a new transaction given an instance of + * AsyncKuduClient. The purpose of this test is to make sure it's possible + * to start a new transaction given an AsyncKuduClient object. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testNewTransactionAsyncClient() throws Exception { + KuduTransaction txn = client.newTransaction(); + assertNotNull(txn); + byte[] buf = txn.serialize(); + final TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + assertTrue(pb.hasTxnId()); + assertTrue(pb.getTxnId() > AsyncKuduClient.INVALID_TXN_ID); + assertTrue(pb.hasEnableKeepalive()); + // By default, keepalive is disabled for a serialized txn token. + assertFalse(pb.getEnableKeepalive()); + assertTrue(pb.hasKeepaliveMillis()); + assertTrue(pb.getKeepaliveMillis() > 0); + } + + /** + * Test scenario that starts a transaction and creates a new transactional + * KuduSession based on the newly started transaction. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testNewTransactionalSession() throws Exception { + final String TABLE_NAME = "new_transactional_session"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + + KuduTransaction txn = client.newTransaction(); + assertNotNull(txn); + KuduSession session = txn.newKuduSession(); + assertNotNull(session); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + KuduTable table = client.openTable(TABLE_NAME); + Insert insert = createBasicSchemaInsert(table, 1); + session.apply(insert); + session.flush(); + + // Rollback the transaction. + txn.rollback(); + + assertFalse(session.isClosed()); + assertEquals(0, session.countPendingErrors()); + + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .build(); + assertEquals(0, countRowsInScan(scanner)); + } + + /** + * Test scenario that starts a transaction and creates a new transactional + * AsyncKuduSession based on the newly started transaction. No rows are + * inserted: it should be possible to rollback the empty transaction with + * no errors reported. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testNewAsyncTransactionalSession() throws Exception { + KuduTransaction txn = client.newTransaction(); + assertNotNull(txn); + AsyncKuduSession session = txn.newAsyncKuduSession(); + assertNotNull(session); + + // Rollback the empty transaction. + txn.rollback(); + + assertFalse(session.isClosed()); + assertEquals(0, session.countPendingErrors()); + } + + /** + * Try to start a transaction when the backend doesn't have the required + * functionality (e.g. a backend which predates the introduction of the + * txn-related functionality). + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled=false", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testTxnOpsWithoutTxnManager() throws Exception { + try (KuduTransaction txn = client.newTransaction()) { + fail("starting a new transaction without TxnManager should have failed"); + } catch (KuduException e) { + final String errmsg = e.getMessage(); + final Status status = e.getStatus(); + assertTrue(status.toString(), status.isRemoteError()); + assertTrue(errmsg, errmsg.matches(".* Not found: .*")); + assertTrue(errmsg, errmsg.matches( + ".* kudu.transactions.TxnManagerService not registered on Master")); + } catch (Exception e) { + fail("unexpected exception: " + e.toString()); + } + } + + /** + * Test KuduTransaction to be used in auto-closable manner. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--txn_schedule_background_tasks=false", + "--enable_txn_system_client_init=true", + }) + public void testAutoclosableUsage() throws Exception { + byte[] buf = null; + + try (KuduTransaction txn = client.newTransaction()) { + buf = txn.serialize(); + assertNotNull(buf); + txn.startCommit(); + txn.isCommitComplete(); + } catch (Exception e) { + fail("unexpected exception: " + e.toString()); + } + + try (KuduTransaction txn = KuduTransaction.deserialize(buf, asyncClient)) { + buf = txn.serialize(); + assertNotNull(buf); + txn.rollback(); + } catch (Exception e) { + fail("unexpected exception: " + e.toString()); + } + + // Do this once more time, just in case to verify that handles created by + // the serialize/deserialize sequence behave as expected. + try (KuduTransaction txn = KuduTransaction.deserialize(buf, asyncClient)) { + buf = txn.serialize(); + assertNotNull(buf); + txn.rollback(); + } catch (Exception e) { + fail("unexpected exception: " + e.toString()); + } + + { + KuduTransaction txn = client.newTransaction(); + // Explicitly call KuduTransaction.close() more than once time to make + // sure it's possible to do so and the method's behavior is idempotent. + txn.close(); + txn.close(); + } + } + + /** + * Verify that a transaction token created by the KuduClient.serialize() + * method has keepalive enabled or disabled as specified by the + * SerializationOptions. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testSerializationOptions() throws Exception { + final KuduTransaction txn = client.newTransaction(); + + // Check the keepalive settings when serializing/deserializing with default + // settings for SerializationOptions. + { + byte[] buf = txn.serialize(); + TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + assertTrue(pb.hasKeepaliveMillis()); + assertTrue(pb.getKeepaliveMillis() > 0); + assertTrue(pb.hasEnableKeepalive()); + assertFalse(pb.getEnableKeepalive()); + } + + // Same as above, but supply an instance of SerializationOptions with + // default settings created by the constructor. + { + KuduTransaction.SerializationOptions options = + new KuduTransaction.SerializationOptions(); + byte[] buf = txn.serialize(options); + TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + assertTrue(pb.hasKeepaliveMillis()); + assertTrue(pb.getKeepaliveMillis() > 0); + assertTrue(pb.hasEnableKeepalive()); + assertFalse(pb.getEnableKeepalive()); + } + + // Same as above, but explicitly disable keepalive for an instance of + // SerializationOptions. + { + KuduTransaction.SerializationOptions options = + new KuduTransaction.SerializationOptions(); + options.setEnableKeepalive(false); + byte[] buf = txn.serialize(options); + TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + assertTrue(pb.hasKeepaliveMillis()); + assertTrue(pb.getKeepaliveMillis() > 0); + assertTrue(pb.hasEnableKeepalive()); + assertFalse(pb.getEnableKeepalive()); + } + + // Explicitly enable keepalive with SerializationOptions. + { + KuduTransaction.SerializationOptions options = + new KuduTransaction.SerializationOptions(); + options.setEnableKeepalive(true); + byte[] buf = txn.serialize(options); + TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + assertTrue(pb.hasKeepaliveMillis()); + assertTrue(pb.getKeepaliveMillis() > 0); + assertTrue(pb.hasEnableKeepalive()); + assertTrue(pb.getEnableKeepalive()); + } + } + + /** + * Test that a KuduTransaction handle created by KuduClient.newTransaction() + * automatically sends keepalive messages. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--txn_keepalive_interval_ms=200", + "--txn_staleness_tracker_interval_ms=50", + "--enable_txn_system_client_init=true", + }) + public void testKeepaliveBasic() throws Exception { + try (KuduTransaction txn = client.newTransaction()) { + final byte[] buf = txn.serialize(); + final TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + assertTrue(pb.hasKeepaliveMillis()); + final long keepaliveMillis = pb.getKeepaliveMillis(); + assertTrue(keepaliveMillis > 0); + Thread.sleep(3 * keepaliveMillis); + // It should be possible to commit the transaction since it supposed to be + // open at this point even after multiples of the inactivity timeout + // interval. + txn.startCommit(); + } catch (Exception e) { + fail("unexpected exception: " + e.toString()); + } + + { + KuduTransaction txn = client.newTransaction(); + final byte[] buf = txn.serialize(); + final TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + assertTrue(pb.hasKeepaliveMillis()); + final long keepaliveMillis = pb.getKeepaliveMillis(); + assertTrue(keepaliveMillis > 0); + // Call KuduTransaction.close() explicitly. + txn.close(); + + // Keep the handle around without any activity for longer than the + // keepalive timeout interval. + Thread.sleep(3 * keepaliveMillis); + + // At this point, the underlying transaction should be automatically + // aborted by the backend. An attempt to commit the transaction should + // fail because the transaction is assumed to be already aborted at this + // point. + NonRecoverableException ex = assertThrows( + NonRecoverableException.class, new ThrowingRunnable() { + @Override + public void run() throws Throwable { + txn.startCommit(); + } + }); + final String errmsg = ex.getMessage(); + assertTrue(errmsg, errmsg.matches( + ".* transaction ID .* is not open: state: ABORT.*")); + + // Verify that KuduTransaction.rollback() successfully runs on a transaction + // handle if the underlying transaction is already aborted automatically + // by the backend. Rolling back the transaction explicitly should succeed + // since it's a pure no-op: rolling back a transaction has idempotent + // semantics. + txn.rollback(); + } + } + + /** + * Test that a KuduTransaction handle created by KuduClient.deserialize() + * automatically sends or doesn't send keepalive heartbeat messages + * depending on the SerializationOptions used while serializing the handle + * into a transaction token. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--txn_keepalive_interval_ms=200", + "--txn_schedule_background_tasks=false", + "--txn_staleness_tracker_interval_ms=50", + "--enable_txn_system_client_init=true", + }) + public void testKeepaliveForDeserializedHandle() throws Exception { + // Check the keepalive behavior when serializing/deserializing with default + // settings for SerializationOptions. + { + KuduTransaction txn = client.newTransaction(); + final byte[] buf = txn.serialize(); + final TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + assertTrue(pb.hasKeepaliveMillis()); + final long keepaliveMillis = pb.getKeepaliveMillis(); + assertTrue(keepaliveMillis > 0); + + KuduTransaction serdesTxn = KuduTransaction.deserialize(buf, asyncClient); + + // Call KuduTransaction.close() explicitly to stop sending automatic + // keepalive messages from 'txn' handle. + txn.close(); + + // Keep the handle around without any activity for longer than the + // keepalive timeout interval. + Thread.sleep(3 * keepaliveMillis); + + // At this point, the underlying transaction should be automatically + // aborted by the backend: the 'txn' handle should not send any heartbeats + // anymore since it's closed, and the 'serdesTxn' handle should not be + // sending any heartbeats. + NonRecoverableException ex = assertThrows( + NonRecoverableException.class, new ThrowingRunnable() { + @Override + public void run() throws Throwable { + serdesTxn.startCommit(); + } + }); + final String errmsg = ex.getMessage(); + assertTrue(errmsg, errmsg.matches( + ".* transaction ID .* is not open: state: ABORT.*")); + + // Verify that KuduTransaction.rollback() successfully runs on both + // transaction handles if the underlying transaction is already aborted + // automatically by the backend. + txn.rollback(); + serdesTxn.rollback(); + } + + // Check the keepalive behavior when serializing/deserializing when + // keepalive heartbeating is enabled in SerializationOptions used + // during the serialization of the original transaction handle. + { + final KuduTransaction.SerializationOptions options = + new KuduTransaction.SerializationOptions(); + options.setEnableKeepalive(true); + KuduTransaction txn = client.newTransaction(); + final byte[] buf = txn.serialize(options); + final TxnTokenPB pb = TxnTokenPB.parseFrom(CodedInputStream.newInstance(buf)); + assertTrue(pb.hasKeepaliveMillis()); + final long keepaliveMillis = pb.getKeepaliveMillis(); + assertTrue(keepaliveMillis > 0); + + KuduTransaction serdesTxn = KuduTransaction.deserialize(buf, asyncClient); + + // Call KuduTransaction.close() explicitly to stop sending automatic + // keepalive messages by the 'txn' handle. + txn.close(); + + // Keep the handle around without any activity for longer than the + // keepalive timeout interval. + Thread.sleep(3 * keepaliveMillis); + + // At this point, the underlying transaction should be kept open + // because the 'serdesTxn' handle sends keepalive heartbeats even if the + // original handle ceased to send those after calling 'close()' on it. + // As an extra sanity check, call 'startCommit()' and 'isCommitComplete()' + // on both handles to make sure no exception is thrown. + serdesTxn.startCommit(); + serdesTxn.isCommitComplete(); + txn.startCommit(); + txn.isCommitComplete(); + } + } + + /** + * This scenario validates the propagation of the commit timestamp for a + * multi-row transaction when committing the transaction synchronously via + * {@link KuduTransaction#commit()} or calling + * {@link KuduTransaction#isCommitComplete()} once the transaction's commit + * has started to run asynchronously. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + // Inject latency to have a chance spotting the transaction in the + // FINALIZE_IN_PROGRESS state and make KuduTransaction.isCommitComplete() + // to return 'false' at least once before returning 'true'. + "--txn_status_manager_inject_latency_finalize_commit_ms=250", + "--enable_txn_system_client_init=true", + }) + public void testPropagateTxnCommitTimestamp() throws Exception { + final String TABLE_NAME = "propagate_txn_commit_timestamp"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 8)); + + KuduTable table = client.openTable(TABLE_NAME); + + // Make sure the commit timestamp for a transaction is propagated to the + // client upon synchronously committing a transaction. + { + KuduTransaction txn = client.newTransaction(); + KuduSession session = txn.newKuduSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + // Insert many rows: the goal is to get at least one row inserted into + // every tablet of the hash-partitioned test table, so every tablet would + // be a participant in the transaction, and most likely every tablet + // server would be involved. + for (int key = 0; key < 128; ++key) { + session.apply(createBasicSchemaInsert(table, key)); + } + session.flush(); + assertEquals(0, session.countPendingErrors()); + + final long tsBeforeCommit = client.getLastPropagatedTimestamp(); + txn.commit(); + final long tsAfterCommit = client.getLastPropagatedTimestamp(); + assertTrue(tsAfterCommit > tsBeforeCommit); + } + + // Make sure the commit timestamp for a transaction is propagated to the + // client upon calling KuduTransaction.isCommitComplete(). + { + KuduTransaction txn = client.newTransaction(); + KuduSession session = txn.newKuduSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + + // Insert many rows: the goal is to get at least one row inserted into + // every tablet of the hash-partitioned test table, so every tablet would + // be a participant in the transaction, and most likely every tablet + // server would be involved. + for (int key = 128; key < 256; ++key) { + session.apply(createBasicSchemaInsert(table, key)); + } + session.flush(); + assertEquals(0, session.countPendingErrors()); + + final long tsBeforeCommit = client.getLastPropagatedTimestamp(); + txn.startCommit(); + assertEquals(tsBeforeCommit, client.getLastPropagatedTimestamp()); + + assertEventuallyTrue("commit should eventually finalize", + new BooleanExpression() { + @Override + public boolean get() throws Exception { + return txn.isCommitComplete(); + } + }, 30000/*timeoutMillis*/); + long tsAfterCommitFinalized = client.getLastPropagatedTimestamp(); + assertTrue(tsAfterCommitFinalized > tsBeforeCommit); + + // A sanity check: calling isCommitComplete() again after the commit phase + // has been finalized doesn't change last propagated timestamp at the + // client side. + for (int i = 0; i < 10; ++i) { + assertTrue(txn.isCommitComplete()); + assertEquals(tsAfterCommitFinalized, client.getLastPropagatedTimestamp()); + Thread.sleep(10); + } + } + + // An empty transaction doesn't have a timestamp, so there is nothing to + // propagate back to client when an empty transaction is committed, so the + // timestamp propagated to the client side should stay unchanged. + { + KuduTransaction txn = client.newTransaction(); + final long tsBeforeCommit = client.getLastPropagatedTimestamp(); + txn.commit(); + + // Just in case, linger a bit after commit has been finalized, checking + // for the timestamp propagated to the client side. + for (int i = 0; i < 10; ++i) { + Thread.sleep(10); + assertEquals(tsBeforeCommit, client.getLastPropagatedTimestamp()); + } + } + } + + /** + * Test to verify that Kudu client is able to switch to TxnManager hosted by + * other kudu-master process when the previously used one isn't available. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + + // Set Raft heartbeat interval short for faster test runtime: speed up + // leader failure detection and new leader election. + "--raft_heartbeat_interval_ms=100", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testSwitchToOtherTxnManager() throws Exception { + final String TABLE_NAME = "txn_manager_ops_fallback"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + + // Start a transaction, then restart every available TxnManager instance + // before attempting any txn-related operation. + { + KuduTransaction txn = client.newTransaction(); + KuduSession session = txn.newKuduSession(); + + KuduTable table = client.openTable(TABLE_NAME); + + Insert insert = createBasicSchemaInsert(table, 0); + session.apply(insert); + session.flush(); + + harness.killAllMasterServers(); + harness.startAllMasterServers(); + + // Querying the status of a transaction should be possible, as usual. + // Since the transaction is still open, KuduTransaction.isCommitComplete() + // should throw corresponding exception with Status.IllegalState. + try { + txn.isCommitComplete(); + fail("KuduTransaction.isCommitComplete should have thrown"); + } catch (NonRecoverableException e) { + assertTrue(e.getStatus().toString(), e.getStatus().isIllegalState()); + assertEquals("transaction is still open", e.getMessage()); + } + + harness.killAllMasterServers(); + harness.startAllMasterServers(); + + // It should be possible to commit the transaction. + txn.commit(); + + // An extra sanity check: read back the rows written into the table in the + // context of the transaction. + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .replicaSelection(ReplicaSelection.LEADER_ONLY) + .build(); + + assertEquals(1, scanner.nextRows().getNumRows()); + } + + // Similar to the above, but run KuduTransaction.commit() when only 2 out + // of 3 masters are running while the TxnManager which used to start the + // transaction is no longer around. + { + KuduTransaction txn = client.newTransaction(); + KuduSession session = txn.newKuduSession(); + + KuduTable table = client.openTable(TABLE_NAME); + + Insert insert = createBasicSchemaInsert(table, 1); + session.apply(insert); + session.flush(); + + harness.killLeaderMasterServer(); + + // It should be possible to commit the transaction: 2 out of 3 masters are + // running and Raft should be able to establish a leader master. So, + // txn-related operations routed through TxnManager should succeed. + txn.commit(); + + // An extra sanity check: read back the rows written into the table in the + // context of the transaction. + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .replicaSelection(ReplicaSelection.LEADER_ONLY) + .build(); + + // It's an empty transaction, and 1 row should be there from the prior + // sub-scenario. + assertEquals(1, scanner.nextRows().getNumRows()); + } + } + + /** + * Test to verify that Kudu client is able to switch to TxnManager hosted by + * other kudu-master process when the previously used one isn't available, + * even if txn-related calls first are issued when no TxnManager was running. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + + // Set Raft heartbeat interval short for faster test runtime: speed up + // leader failure detection and new leader election. + "--raft_heartbeat_interval_ms=100", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testSwitchToOtherTxnManagerInFlightCalls() throws Exception { + final String TABLE_NAME = "txn_manager_ops_fallback_inflight"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + + KuduTransaction txn = client.newTransaction(); + KuduSession session = txn.newKuduSession(); + + KuduTable table = client.openTable(TABLE_NAME); + + Insert insert = createBasicSchemaInsert(table, 0); + session.apply(insert); + session.flush(); + + harness.killAllMasterServers(); + + Thread t = new Thread(new Runnable() { + @Override + public void run() { + try { + // Sleep for some time to allow the KuduTransaction.commit() call + // below issue RPCs to non-running TxnManangers. + Thread.sleep(1000); + harness.startAllMasterServers(); + } catch (Exception e) { + fail("failed to start all masters: " + e); + } + } + }); + t.start(); + + // It should be possible to commit the transaction. + txn.commit(); + + // Just an extra sanity check: the thread should join pretty fast, otherwise + // the call to KuduTransaction.commit() above could not succeed. + t.join(250); + + // An extra sanity check: read back the rows written into the table in the + // context of the transaction. + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .replicaSelection(ReplicaSelection.LEADER_ONLY) + .build(); + + assertEquals(1, countRowsInScan(scanner)); + } + + /** + * Test to verify that Kudu client is able to switch to another TxnManager + * instance when the kudu-master process which hosts currently used TxnManager + * becomes temporarily unavailable (e.g. shut down, restarted, stopped, etc.). + * + * The essence of this scenario is to make sure that Kudu Java client connects + * to a different TxnManager instance and starts sending txn keepalive + * messages there in a timely manner, keeping the transaction alive even if + * the originally used TxnManager instance isn't available. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + + // Set Raft heartbeat interval short for faster test runtime: speed up + // leader failure detection and new leader election. + "--raft_heartbeat_interval_ms=100", + }) + @TabletServerConfig(flags = { + // The txn keepalive interval should be long enough to accommodate Raft + // leader failure detection and election. + "--txn_keepalive_interval_ms=1000", + "--txn_staleness_tracker_interval_ms=250", + "--enable_txn_system_client_init=true", + }) + public void testTxnKeepaliveSwitchesToOtherTxnManager() throws Exception { + final String TABLE_NAME = "txn_manager_fallback"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + + KuduTransaction txn = client.newTransaction(); + KuduSession session = txn.newKuduSession(); + + KuduTable table = client.openTable(TABLE_NAME); + + Insert insert = createBasicSchemaInsert(table, 0); + session.apply(insert); + session.flush(); + + harness.killLeaderMasterServer(); + + // Wait for two keepalive intervals to make sure the backend got a chance + // to automatically abort the transaction if not receiving txn keepalive + // messages. + Thread.sleep(2 * 1000); + + // It should be possible to commit the transaction. This is to verify that + // + // * the client eventually starts sending txn keepalive messages to other + // TxnManager instance (the original was hosted by former leader master + // which is no longer available), so the backend doesn't abort the + // transaction automatically due to not receiving keepalive messages + // + // * the client switches to the new TxnManager for other txn-related + // operations as well + txn.commit(); + + // An extra sanity check: read back the rows written into the table in the + // context of the transaction. + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .replicaSelection(ReplicaSelection.LEADER_ONLY) + .build(); + assertEquals(1, countRowsInScan(scanner)); + } + + /** + * Similar to the {@link #testTxnKeepaliveSwitchesToOtherTxnManager()} above, + * but with additional twist of "rolling" unavailability of leader masters. + * In addition, make sure the errors sent from TxnManager are processed + * accordingly when TxnStatusManager is not around. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + + // Set Raft heartbeat interval short for faster test runtime: speed up + // leader failure detection and new leader election. + "--raft_heartbeat_interval_ms=100", + }) + @TabletServerConfig(flags = { + // The txn keepalive interval should be long enough to accommodate Raft + // leader failure detection and election. + "--txn_keepalive_interval_ms=1000", + "--txn_staleness_tracker_interval_ms=250", + "--enable_txn_system_client_init=true", + }) + public void testTxnKeepaliveRollingSwitchToOtherTxnManager() throws Exception { + final String TABLE_NAME = "txn_manager_fallback_rolling"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + + KuduTransaction txn = client.newTransaction(); + KuduSession session = txn.newKuduSession(); + + KuduTable table = client.openTable(TABLE_NAME); + + // Cycle the leadership among masters, making sure the client successfully + // switches to every newly elected leader master to send keepalive messages. + final int numMasters = harness.getMasterServers().size(); + for (int i = 0; i < numMasters; ++i) { + // Shutdown the leader master. + final HostAndPort hp = harness.killLeaderMasterServer(); + + // Wait for two keepalive intervals to give the backend a chance + // to automatically abort the transaction if not receiving txn keepalive + // messages. + Thread.sleep(2 * 1000); + + // The transaction should be still alive. + try { + txn.isCommitComplete(); + fail("KuduTransaction.isCommitComplete should have thrown"); + } catch (NonRecoverableException e) { + assertTrue(e.getStatus().toString(), e.getStatus().isIllegalState()); + assertEquals("transaction is still open", e.getMessage()); + } + + // In addition, it should be possible to insert rows in the context + // of the transaction. + session.apply(createBasicSchemaInsert(table, i)); + session.flush(); + + // Start the master back. + harness.startMaster(hp); + } + + // Make sure Java client properly processes error responses sent back by + // TxnManager when the TxnStatusManager isn't available. So, shutdown all + // tablet servers: this is to make sure TxnStatusManager isn't there. + harness.killAllTabletServers(); + + Thread t = new Thread(new Runnable() { + @Override + public void run() { + try { + // Sleep for some time to allow the KuduTransaction.commit() call + // below issue RPCs when TxnStatusManager is not yet around. + Thread.sleep(2 * 1000); + + // Start all the tablet servers back so the TxnStatusManager is back. + harness.startAllTabletServers(); + } catch (Exception e) { + fail("failed to start all tablet servers back: " + e); + } + } + }); + t.start(); + + // The transaction should be still alive, and it should be possible to + // commit it. + txn.commit(); + + t.join(); + + // An extra sanity check: read back the rows written into the table in the + // context of the transaction. + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .build(); + assertEquals(numMasters, countRowsInScan(scanner)); + } + + /** + * Make sure {@link KuduTransaction#commit} flushes pending operations + * for all sessions created off the {@link KuduTransaction} handle. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testFlushSessionsOnCommit() throws Exception { + final String TABLE_NAME = "flush_sessions_on_commit"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + KuduTable table = client.openTable(TABLE_NAME); + int key = 0; + + // Regardless of the flush mode, a transactional session is automatically + // flushed when the transaction is committed. + { + final SessionConfiguration.FlushMode[] kFlushModes = { + SessionConfiguration.FlushMode.MANUAL_FLUSH, + SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND, + SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC, + }; + + for (SessionConfiguration.FlushMode mode : kFlushModes) { + KuduTransaction txn = client.newTransaction(); + KuduSession session = txn.newKuduSession(); + session.setFlushMode(mode); + Insert insert = createBasicSchemaInsert(table, key++); + session.apply(insert); + + if (mode == SessionConfiguration.FlushMode.MANUAL_FLUSH) { + assertTrue(session.hasPendingOperations()); + } + + txn.commit(); + + assertFalse(session.hasPendingOperations()); + assertEquals(0, session.getPendingErrors().getRowErrors().length); + } + + // Make sure all the applied rows have been persisted. + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .build(); + assertEquals(key, countRowsInScan(scanner)); + } + + // Make sure that all the transactional sessions are flushed upon committing + // a transaction. + { + KuduTransaction txn = client.newTransaction(); + List sessions = new ArrayList<>(10); + for (int i = 0; i < 10; ++i) { + KuduSession s = txn.newKuduSession(); + s.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + Insert insert = createBasicSchemaInsert(table, key++); + s.apply(insert); + assertTrue(s.hasPendingOperations()); + sessions.add(s); + } + + txn.commit(); + + for (KuduSession session : sessions) { + assertFalse(session.hasPendingOperations()); + assertEquals(0, session.getPendingErrors().getRowErrors().length); + } + + // Make sure all the applied rows have been persisted. + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .build(); + assertEquals(key, countRowsInScan(scanner)); + } + + // Closing and flushing transactional sessions explicitly prior to commit + // is totally fine as well. + { + KuduTransaction txn = client.newTransaction(); + { + KuduSession s = txn.newKuduSession(); + s.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + Insert insert = createBasicSchemaInsert(table, key++); + s.apply(insert); + s.close(); + } + KuduSession session = txn.newKuduSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + Insert insert = createBasicSchemaInsert(table, key++); + session.apply(insert); + session.flush(); + + txn.commit(); + + assertFalse(session.hasPendingOperations()); + assertEquals(0, session.getPendingErrors().getRowErrors().length); + + // Make sure all the applied rows have been persisted. + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .build(); + assertEquals(key, countRowsInScan(scanner)); + } + } + + /** + * Make sure it's possible to recover from an error occurred while flushing + * a transactional session: a transaction handle stays valid and it's possible + * to retry calling {@link KuduTransaction#commit()} after handling session + * flush errors. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testRetryCommitAfterSessionFlushError() throws Exception { + final String TABLE_NAME = "retry_commit_after_session_flush_error"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + KuduTable table = client.openTable(TABLE_NAME); + int key = 0; + + KuduTransaction txn = client.newTransaction(); + KuduSession session = txn.newKuduSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + { + Insert insert = createBasicSchemaInsert(table, key); + session.apply(insert); + } + // Try to insert a row with a duplicate key. + { + Insert insert = createBasicSchemaInsert(table, key++); + session.apply(insert); + } + + try { + txn.commit(); + fail("committing a transaction with duplicate row should have failed"); + } catch (NonRecoverableException e) { + final String errmsg = e.getMessage(); + final Status status = e.getStatus(); + assertTrue(status.toString(), status.isIncomplete()); + assertTrue(errmsg, errmsg.matches( + "failed to flush a transactional session: .*")); + } + + // Insert one more row using the same session. + { + Insert insert = createBasicSchemaInsert(table, key++); + session.apply(insert); + } + + // Now, retry committing the transaction. + txn.commit(); + + assertEquals(0, session.getPendingErrors().getRowErrors().length); + + // Make sure all the applied rows have been persisted. + KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .build(); + assertEquals(key, countRowsInScan(scanner)); + } + + /** + * Make sure {@link KuduTransaction#startCommit} succeeds when called on + * a transaction handle which has all of its transactional sessions flushed. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testStartCommitWithFlushedSessions() throws Exception { + final String TABLE_NAME = "start_commit_with_flushed_sessions"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + KuduTable table = client.openTable(TABLE_NAME); + int key = 0; + + KuduTransaction txn = client.newTransaction(); + { + KuduSession session = txn.newKuduSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + Insert insert = createBasicSchemaInsert(table, key++); + session.apply(insert); + assertFalse(session.hasPendingOperations()); + assertEquals(0, session.getPendingErrors().getRowErrors().length); + } + + KuduSession session = txn.newKuduSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + Insert insert = createBasicSchemaInsert(table, key); + session.apply(insert); + assertTrue(session.hasPendingOperations()); + session.flush(); + assertFalse(session.hasPendingOperations()); + + // KuduTransaction.startCommit() should succeed now. + txn.startCommit(); + } + + /** + * Check the behavior of {@link KuduTransaction#startCommit} when there are + * non-flushed transactional sessions started off a transaction handle. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testStartCommitWithNonFlushedSessions() throws Exception { + final String TABLE_NAME = "non_flushed_sessions_on_start_commit"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + KuduTable table = client.openTable(TABLE_NAME); + int key = 0; + + KuduTransaction txn = client.newTransaction(); + + // Create one session which will have no pending operations upon + // startCommit() + { + KuduSession session = txn.newKuduSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + Insert insert = createBasicSchemaInsert(table, key++); + session.apply(insert); + assertFalse(session.hasPendingOperations()); + assertEquals(0, session.getPendingErrors().getRowErrors().length); + } + + KuduSession session = txn.newKuduSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + Insert insert = createBasicSchemaInsert(table, key); + session.apply(insert); + assertTrue(session.hasPendingOperations()); + + try { + txn.startCommit(); + fail("startCommit() should have failed when operations are pending"); + } catch (NonRecoverableException e) { + final String errmsg = e.getMessage(); + final Status status = e.getStatus(); + assertTrue(status.toString(), status.isIllegalState()); + assertTrue(errmsg, errmsg.matches( + ".* at least one transactional session has write operations pending")); + } + + assertTrue(session.hasPendingOperations()); + assertEquals(0, session.getPendingErrors().getRowErrors().length); + } + + /** + * Verify the behavior of {@link KuduTransaction#newAsyncKuduSession} when the + * commit process has already been started for the corresponding transaction. + * This automatically verifies the behavior of + * {@link KuduTransaction#newKuduSession} because it works via + * {@link KuduTransaction#newAsyncKuduSession}. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testNewSessionAfterCommit() throws Exception { + final String TABLE_NAME = "new_session_after_commit"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + KuduTable table = client.openTable(TABLE_NAME); + int key = 0; + + { + KuduTransaction txn = client.newTransaction(); + KuduSession session = txn.newKuduSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + { + Insert insert = createBasicSchemaInsert(table, key); + session.apply(insert); + } + // Try to insert a row with a duplicate key. + { + Insert insert = createBasicSchemaInsert(table, key); + session.apply(insert); + } + try { + txn.commit(); + fail("committing a transaction with duplicate row should have failed"); + } catch (NonRecoverableException e) { + final String errmsg = e.getMessage(); + final Status status = e.getStatus(); + assertTrue(status.toString(), status.isIncomplete()); + assertTrue(errmsg, errmsg.matches( + "failed to flush a transactional session: .*")); + } + + try { + txn.newAsyncKuduSession(); + fail("newKuduSession() should throw when transaction already committed"); + } catch (IllegalStateException e) { + final String errmsg = e.getMessage(); + assertTrue(errmsg, errmsg.matches("commit already started")); + } + txn.rollback(); + } + + { + KuduTransaction txn = client.newTransaction(); + txn.commit(); + try { + txn.newAsyncKuduSession(); + fail("newKuduSession() should throw when transaction already committed"); + } catch (IllegalStateException e) { + final String errmsg = e.getMessage(); + assertTrue(errmsg, errmsg.matches( + "transaction is not open for this handle")); + } + } + } + + /** + * This scenario is similar to the scenario above, but it calls + * {@link KuduTransaction#startCommit} instead of + * {@link KuduTransaction#commit}. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testCreateSessionAfterStartCommit() throws Exception { + KuduTransaction txn = client.newTransaction(); + txn.startCommit(); + try { + txn.newAsyncKuduSession(); + fail("newKuduSession() should throw when transaction already committed"); + } catch (IllegalStateException e) { + final String errmsg = e.getMessage(); + assertTrue(errmsg, errmsg.matches( + "transaction is not open for this handle")); + } + } + + /** + * A test scenario to verify the behavior of the client API when a write + * operation submitted into a transaction session after the transaction + * has already been committed. + */ + @Test(timeout = 100000) + @MasterServerConfig(flags = { + // TxnManager functionality is necessary for this scenario. + "--txn_manager_enabled", + }) + @TabletServerConfig(flags = { + "--enable_txn_system_client_init=true", + }) + public void testSubmitWriteOpAfterCommit() throws Exception { + final String TABLE_NAME = "submit_write_op_after_commit"; + client.createTable( + TABLE_NAME, + ClientTestUtil.getBasicSchema(), + new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2)); + KuduTable table = client.openTable(TABLE_NAME); + int key = 0; + + KuduTransaction txn = client.newTransaction(); + KuduSession session = txn.newKuduSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + { + Insert insert = createBasicSchemaInsert(table, key++); + session.apply(insert); + } + + txn.commit(); + + { + Insert insert = createBasicSchemaInsert(table, key); + session.apply(insert); + } + List results = session.flush(); + assertEquals(1, results.size()); + OperationResponse rowResult = results.get(0); + assertTrue(rowResult.hasRowError()); + String errmsg = rowResult.getRowError().toString(); + assertTrue(errmsg, errmsg.matches( + ".* transaction ID .* not open: COMMITTED .*")); + } + + // TODO(aserbin): when test harness allows for sending Kudu servers particular + // signals, add a test scenario to verify that timeout for + // TxnManager request is set low enough to detect 'frozen' + // TxnManager instance (e.g., sent SIGSTOP signal), and is able + // to switch to another TxnManager to send txn keepalive + // requests fast enough to keep the transaction alive. +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestLeaderFailover.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestLeaderFailover.java new file mode 100644 index 0000000000..656a6607df --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestLeaderFailover.java @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.KuduTestHarness; + +public class TestLeaderFailover { + + private static final String TABLE_NAME = + TestLeaderFailover.class.getName() + "-" + System.currentTimeMillis(); + private KuduTable table; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() throws Exception { + CreateTableOptions builder = getBasicCreateTableOptions(); + harness.getClient().createTable(TABLE_NAME, getBasicSchema(), builder); + table = harness.getClient().openTable(TABLE_NAME); + } + + /** + * This test writes 3 rows, kills the leader, then tries to write another 3 rows. Finally it + * counts to make sure we have 6 of them. + * + * This test won't run if we didn't start the cluster. + */ + @Test(timeout = 100000) + public void testFailover() throws Exception { + KuduSession session = harness.getClient().newSession(); + for (int i = 0; i < 3; i++) { + session.apply(createBasicSchemaInsert(table, i)); + } + + // Make sure the rows are in there before messing things up. + AsyncKuduScanner scanner = harness.getAsyncClient().newScannerBuilder(table).build(); + assertEquals(3, countRowsInScan(scanner)); + + harness.killTabletLeader(table); + + for (int i = 3; i < 6; i++) { + OperationResponse resp = session.apply(createBasicSchemaInsert(table, i)); + if (resp.hasRowError()) { + fail("Encountered a row error " + resp.getRowError()); + } + } + + scanner = harness.getAsyncClient().newScannerBuilder(table).build(); + assertEquals(6, countRowsInScan(scanner)); + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestMasterFailover.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestMasterFailover.java new file mode 100644 index 0000000000..2702e50e8d --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestMasterFailover.java @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.junit.Assert.assertEquals; + +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.KuduTestHarness; + +/** + * Tests {@link AsyncKuduClient} with multiple masters. + */ +public class TestMasterFailover { + enum KillBefore { + CREATE_CLIENT, + CREATE_TABLE, + OPEN_TABLE, + SCAN_TABLE + } + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Test(timeout = 30000) + public void testKillLeaderBeforeCreateClient() throws Exception { + doTestKillLeader(KillBefore.CREATE_CLIENT); + } + + @Test(timeout = 30000) + public void testKillLeaderBeforeCreateTable() throws Exception { + doTestKillLeader(KillBefore.CREATE_TABLE); + } + + @Test(timeout = 30000) + public void testKillLeaderBeforeOpenTable() throws Exception { + doTestKillLeader(KillBefore.OPEN_TABLE); + } + + @Test(timeout = 30000) + public void testKillLeaderBeforeScanTable() throws Exception { + doTestKillLeader(KillBefore.SCAN_TABLE); + } + + private void doTestKillLeader(KillBefore killBefore) throws Exception { + String tableName = "TestMasterFailover-killBefore=" + killBefore; + int countMasters = harness.getMasterServers().size(); + if (countMasters < 3) { + throw new Exception("This test requires at least 3 master servers, but only " + + countMasters + " are specified."); + } + + if (killBefore == KillBefore.CREATE_CLIENT) { + harness.killLeaderMasterServer(); + } + try (KuduClient c = + new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()).build()) { + if (killBefore == KillBefore.CREATE_TABLE) { + harness.killLeaderMasterServer(); + } + + harness.getClient().createTable(tableName, getBasicSchema(), getBasicCreateTableOptions()); + + if (killBefore == KillBefore.OPEN_TABLE) { + harness.killLeaderMasterServer(); + } + + // Test that we can open a previously created table after killing the leader master. + KuduTable table = harness.getClient().openTable(tableName); + + if (killBefore == KillBefore.SCAN_TABLE) { + harness.killLeaderMasterServer(); + } + assertEquals(0, + countRowsInScan(harness.getAsyncClient().newScannerBuilder(table).build())); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestMultiMasterAuthzTokens.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestMultiMasterAuthzTokens.java new file mode 100644 index 0000000000..edcc73e779 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestMultiMasterAuthzTokens.java @@ -0,0 +1,189 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.client.SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND; +import static org.apache.kudu.client.SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC; +import static org.apache.kudu.test.ClientTestUtil.countRowsInTable; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaUpsert; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.cluster.MiniKuduCluster; + +public class TestMultiMasterAuthzTokens { + private static final MiniKuduCluster.MiniKuduClusterBuilder clusterBuilder = + KuduTestHarness.getBaseClusterBuilder() + .addMasterServerFlag("--authz_token_validity_seconds=1") + .addTabletServerFlag("--tserver_enforce_access_control=true") + // Inject invalid tokens such that operations will be forced to go + // back to the master for an authz token. + .addTabletServerFlag("--tserver_inject_invalid_authz_token_ratio=0.5"); + + private static final String tableName = "TestMultiMasterAuthzToken-table"; + + private KuduClient client; + + @Before + public void setUp() { + client = harness.getClient(); + } + + @Rule + public KuduTestHarness harness = new KuduTestHarness(clusterBuilder); + + /** + * Utility to send RPCs to add rows given the specified flush mode. + * Inserts rows with keys [startRow, endRow). + */ + private void insertRows(KuduTable table, SessionConfiguration.FlushMode mode, + int startRow, int endRow) throws Exception { + KuduSession session = client.newSession(); + session.setFlushMode(mode); + for (int i = startRow; i < endRow; i++) { + Insert insert = createBasicSchemaInsert(table, i); + session.apply(insert); + } + session.flush(); + } + + /** + * Utility to send RPCs to add rows given the specified flush mode. + * Upserts rows with keys [startRow, endRow). + */ + private void upsertRows(KuduTable table, SessionConfiguration.FlushMode mode, + int startRow, int endRow) throws Exception { + KuduSession session = client.newSession(); + session.setFlushMode(mode); + for (int i = startRow; i < endRow; i++) { + Upsert upsert = createBasicSchemaUpsert(table, i); + session.apply(upsert); + } + session.flush(); + } + + @Test + public void testAuthzTokensDuringElection() throws Exception { + // Test sending various requests that require authorization. + final KuduTable table = client.createTable(tableName, getBasicSchema(), + getBasicCreateTableOptions().setNumReplicas(1)); + + // Restart the masters to trigger an election. + harness.killAllMasterServers(); + harness.startAllMasterServers(); + + final int NUM_REQS = 10; + insertRows(table, AUTO_FLUSH_SYNC, 0, NUM_REQS); + + // Do the same for batches of inserts. + harness.killAllMasterServers(); + harness.startAllMasterServers(); + insertRows(table, AUTO_FLUSH_BACKGROUND, NUM_REQS, 2 * NUM_REQS); + + // And for scans. + harness.killAllMasterServers(); + harness.startAllMasterServers(); + for (int i = 0; i < NUM_REQS; i++) { + assertEquals(2 * NUM_REQS, countRowsInTable(table)); + } + } + + @Test + public void testAuthzTokenExpiration() throws Exception { + // Test a long-running concurrent workload with different types of requests + // being sent, all the while injecting invalid tokens, with a short authz + // token expiration time. The threads should reacquire tokens as needed + // without surfacing token errors to the client. + final int TEST_RUNTIME_MS = 30000; + final KuduTable table = client.createTable(tableName, getBasicSchema(), + getBasicCreateTableOptions().setNumReplicas(1)); + final CountDownLatch latch = new CountDownLatch(1); + final ExecutorService pool = Executors.newFixedThreadPool(3); + List> exceptions = new ArrayList<>(); + exceptions.add(pool.submit(new Callable() { + @Override + public Exception call() throws Exception { + try { + int batch = 0; + while (latch.getCount() > 0) { + // Send writes without batching. + upsertRows(table, AUTO_FLUSH_SYNC, batch * 10, ++batch * 10); + } + } catch (Exception e) { + return e; + } + return null; + } + })); + exceptions.add(pool.submit(new Callable() { + @Override + public Exception call() throws Exception { + try { + int batch = 0; + while (latch.getCount() > 0) { + // Also send writes with batching. + upsertRows(table, AUTO_FLUSH_BACKGROUND, batch * 10, ++batch * 10); + } + } catch (Exception e) { + return e; + } + return null; + } + })); + exceptions.add(pool.submit(new Callable() { + @Override + public Exception call() throws Exception { + try { + while (latch.getCount() > 0) { + // We can't guarantee a row count, but catch any exceptions. + countRowsInTable(table); + } + } catch (Exception e) { + return e; + } + return null; + } + })); + Thread.sleep(TEST_RUNTIME_MS); + latch.countDown(); + int fails = 0; + for (Future future : exceptions) { + Exception e = future.get(); + if (e != null) { + e.printStackTrace(); + fails++; + } + } + assertEquals(0, fails); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestMultipleLeaderFailover.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestMultipleLeaderFailover.java new file mode 100644 index 0000000000..e2daeef07c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestMultipleLeaderFailover.java @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; +import static org.apache.kudu.test.junit.AssertHelpers.assertEventuallyTrue; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.util.List; + +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.junit.AssertHelpers.BooleanExpression; + +public class TestMultipleLeaderFailover { + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + private void waitUntilRowCount(final KuduTable table, final int rowCount, long timeoutMs) + throws Exception { + assertEventuallyTrue(String.format("Read count should be %s", rowCount), + new BooleanExpression() { + @Override + public boolean get() throws Exception { + AsyncKuduScanner scanner = harness.getAsyncClient().newScannerBuilder(table).build(); + int readCount = countRowsInScan(scanner); + return readCount == rowCount; + } + }, timeoutMs); + } + + /** + * This test writes 3 rows. Then in a loop, it kills the leader, then tries to write inner_row + * rows, and finally restarts the tablet server it killed. Verifying with a read as it goes. + * Finally it counts to make sure we have total_rows_to_insert of them. + */ + @Test(timeout = 100000) + @SuppressWarnings("deprecation") + public void testMultipleFailover() throws Exception { + KuduTable table; + CreateTableOptions builder = getBasicCreateTableOptions(); + String tableName = + TestMultipleLeaderFailover.class.getName() + "-" + System.currentTimeMillis(); + harness.getClient().createTable(tableName, getBasicSchema(), builder); + + table = harness.getClient().openTable(tableName); + KuduSession session = harness.getClient().newSession(); + final int ROWS_PER_ITERATION = 3; + final int NUM_ITERATIONS = 10; + final int TOTAL_ROWS_TO_INSERT = ROWS_PER_ITERATION + NUM_ITERATIONS * ROWS_PER_ITERATION; + + for (int i = 0; i < ROWS_PER_ITERATION; i++) { + session.apply(createBasicSchemaInsert(table, i)); + } + + waitUntilRowCount(table, ROWS_PER_ITERATION, DEFAULT_SLEEP); + + int currentRows = ROWS_PER_ITERATION; + for (int i = 0; i < NUM_ITERATIONS; i++) { + List tablets = table.getTabletsLocations(DEFAULT_SLEEP); + assertEquals(1, tablets.size()); + harness.killTabletLeader(tablets.get(0)); + + for (int j = 0; j < ROWS_PER_ITERATION; j++) { + OperationResponse resp = session.apply(createBasicSchemaInsert(table, currentRows)); + if (resp.hasRowError()) { + fail("Encountered a row error " + resp.getRowError()); + } + currentRows++; + } + + harness.startAllTabletServers(); + // Read your writes hasn't been enabled, so we need to use a helper function to poll. + waitUntilRowCount(table, currentRows, DEFAULT_SLEEP); + + } + waitUntilRowCount(table, TOTAL_ROWS_TO_INSERT, DEFAULT_SLEEP); + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestNegotiation.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestNegotiation.java new file mode 100644 index 0000000000..89d2728eb6 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestNegotiation.java @@ -0,0 +1,308 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static junit.framework.TestCase.assertFalse; +import static junit.framework.TestCase.assertTrue; +import static org.apache.kudu.test.junit.AssertHelpers.assertEventuallyTrue; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; + +import java.io.Closeable; +import java.util.Arrays; + +import com.google.protobuf.ByteString; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.CapturingLogAppender; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.KuduTestHarness.MasterServerConfig; +import org.apache.kudu.test.cluster.FakeDNS; +import org.apache.kudu.test.cluster.MiniKuduCluster.MiniKuduClusterBuilder; +import org.apache.kudu.test.junit.AssertHelpers.BooleanExpression; + +public class TestNegotiation { + private static final MiniKuduClusterBuilder clusterBuilder = + new MiniKuduClusterBuilder() + .numMasterServers(1) + .numTabletServers(0) + .enableKerberos(); + + @Rule + public KuduTestHarness harness = new KuduTestHarness(clusterBuilder); + + /** + * Test that a non-Kerberized client will use SASL PLAIN to connect to a + * Kerberized server which doesn't require authentication. Regression test for + * KUDU-2121. + */ + @Test + @MasterServerConfig(flags = { + "--rpc-authentication=optional", + "--rpc-trace-negotiation", + "--user-acl=*" }) + public void testSaslPlainFallback() throws Exception { + FakeDNS.getInstance().install(); + + CapturingLogAppender cla = new CapturingLogAppender(); + harness.kdestroy(); + harness.resetClients(); + try (Closeable c = cla.attach()) { + assertTrue(harness.getClient().getTablesList().getTablesList().isEmpty()); + } + assertTrue(cla.getAppendedText(), + cla.getAppendedText().contains("Client requested to use mechanism: PLAIN")); + } + + /** + * When JWT is enabled on the server, a client with a valid JWT should be + * able to connect using the provided JSON Web Token to authenticate to Kudu + * servers (Kudu master in this particular case). + * + * In other words, when Kudu client has JWT and trusts the server's TLS + * certificate, the client and the server should negotiate a connection using + * the JSON Web Token provided by the client. + */ + @Test + @MasterServerConfig(flags = { + "--enable-jwt-token-auth", + "--rpc-trace-negotiation", + }) + public void testJwtAuthnWithTrustedCert() throws Exception { + FakeDNS.getInstance().install(); + CapturingLogAppender cla = new CapturingLogAppender(); + + // The test harness might have the client already connected to the test + // cluster. + harness.resetClients(); + KuduClient client = harness.getClient(); + String jwt = harness.createJwtFor("account-id", "kudu", true); + assertNotNull(jwt); + client.jwt(jwt); + + waitForClusterCACert(); + final byte[] caCert = harness.getClusterCACertDer(); + assertNotEquals(0, caCert.length); + client.trustedCertificates(Arrays.asList(ByteString.copyFrom(caCert))); + + try (Closeable c = cla.attach()) { + // A simple call to make sure the client has connected to the cluster. + // Success here assumes that the RPC connection to the Kudu server + // has been successfully negotiated. + assertFalse(client.tableExists("nonexistent")); + } + + // Make sure the parties used JWT authn mechanism to negotiate the connection. + assertTrue(cla.getAppendedText(), + cla.getAppendedText().contains("Negotiated authn=JWT")); + } + + @Test + @MasterServerConfig(flags = { + "--enable-jwt-token-auth", + "--rpc-authentication=required", + "--rpc-negotiation-timeout-ms=1500", + "--rpc-trace-negotiation", + }) + public void testJwtAuthnWithoutTrustedCert() throws Exception { + FakeDNS.getInstance().install(); + CapturingLogAppender cla = new CapturingLogAppender(); + + harness.kdestroy(); + harness.resetClients(); + + // Create a special client with short timeout for RPCs. This is a bit funny, + // but due to the way how ConnectToMaster is implemented in the Java client, + // there isn't a simple way to stop the client to negotiate a connection + // again and again, unless the overall RPC times out. A connection closure + // upon negotiation failure is being interpreted as NetworkError, and that's + // a recoverable exception, so the operation is retried again and again. + // + // For faster test runs, the RPC timeout is set lower than the RPC connection + // negotiation timeout, while the latter is set lower than its default value + // (see the MasterServerConfig for the test). However, to prevent test + // flakiness, it's necessary to have at least connection negotiation attempt + // before the RPC times out. + AsyncKuduClient asyncClient = new AsyncKuduClient.AsyncKuduClientBuilder( + harness.getMasterAddressesAsString()) + .defaultAdminOperationTimeoutMs(1000) + .defaultOperationTimeoutMs(1000) + .build(); + KuduClient client = asyncClient.syncClient(); + + // Provide the client with a valid JWT. + String jwt = harness.createJwtFor("account-id", "kudu", true); + assertNotNull(jwt); + client.jwt(jwt); + + try (Closeable c = cla.attach()) { + // It doesn't matter what method to call here: ConnectToMaster should not + // succeed, so the corresponding RPC won't be invoked anyway. + assertFalse(client.tableExists("nonexistent")); + fail(); + } catch (NonRecoverableException ex) { + // Java client reports SERVICE_UNAVAILABLE in this case. + // + // TODO(aserbin): is this a bug? should it be fixed? + assertTrue(ex.getStatus().isServiceUnavailable()); + } + + // Make sure the parties aren't using JWT authn mechanism to negotiate the + // connection since the client shouldn't be willing to send its JWT to a + // non-authenticated Kudu server. As of now, the parties are using the SASL + // authn mechanism in current implementation, but that's not an invariant + // to enforce, so it's not asserted here. + assertFalse(cla.getAppendedText(), cla.getAppendedText().contains( + "Negotiated authn=JWT")); + assertTrue(cla.getAppendedText(), cla.getAppendedText().contains( + "server requires authentication, but client does not have Kerberos credentials (tgt).")); + assertTrue(cla.getAppendedText(), cla.getAppendedText().contains( + "Authentication tokens were not used because no token is available]")); + } + + /** + * Try to authenticate with a valid JWT by mismatched account/principal name. + * An RPC connection to the server will be established successfully, but + * the client will fail to invoke the ConnectToMaster RPC because of + * NotAuthorized error from the coarse-grain authz subsystem. + */ + @Test + @MasterServerConfig(flags = { + "--enable-jwt-token-auth", + "--rpc-trace-negotiation", + }) + public void testValidJwtButWrongSubject() throws Exception { + FakeDNS.getInstance().install(); + CapturingLogAppender cla = new CapturingLogAppender(); + + // The test harness might have the client already connected to the test + // cluster. + harness.resetClients(); + KuduClient client = harness.getClient(); + String jwt = harness.createJwtFor("account-id", "interloper", true); + assertNotNull(jwt); + client.jwt(jwt); + + waitForClusterCACert(); + final byte[] caCert = harness.getClusterCACertDer(); + assertNotEquals(0, caCert.length); + client.trustedCertificates(Arrays.asList(ByteString.copyFrom(caCert))); + + try (Closeable c = cla.attach()) { + // It doesn't matter what method to call here: ConnectToMaster should not + // succeed, so the corresponding RPC won't be invoked anyway. + client.tableExists("nonexistent"); + fail(); + } catch (NonRecoverableException ex) { + // That's a bit funny, but Java client reports SERVICE_UNAVAILABLE in this + // case when failing to call a remote method due to NotAuthorized error + // code returned by Kudu master. + // + // TODO(aserbin): is this a bug? should it be fixed? + assertTrue(ex.getStatus().isServiceUnavailable()); + assertTrue(ex.getMessage().contains( + "Not authorized: unauthorized access to method: ConnectToMaster")); + } + + // Make sure the parties used JWT authn mechanism to successfully negotiate + // the connection, even if the coarse-grained authz check rejected a remote + // call of one of the API methods. + assertTrue(cla.getAppendedText(), + cla.getAppendedText().contains("Negotiated authn=JWT")); + } + + /** + * Try to authenticate with an invalid JWT. The connection negotiation + * should fail because the server should not be able to verify the invalid JWT + * that the client provided. + */ + @Test + @MasterServerConfig(flags = { + "--enable-jwt-token-auth", + "--rpc-negotiation-timeout-ms=1500", + "--rpc-trace-negotiation", + }) + public void testInvalidJwt() throws Exception { + FakeDNS.getInstance().install(); + CapturingLogAppender cla = new CapturingLogAppender(); + + // Create a special client with short timeout for RPCs. This is a bit funny, + // but due to the way how ConnectToMaster is implemented in the Java client, + // there isn't a simple way to stop the client to negotiate a connection + // again and again, unless the overall RPC times out. + // + // For faster test runs, the RPC timeout is set lower than the RPC connection + // negotiation timeout, while the latter is set lower than its default value + // (see the MasterServerConfig for the test). However, to prevent test + // flakiness, it's necessary to have at least connection negotiation attempt + // before the RPC times out. + // + // TODO(aserbin): fix ConnectToMaster and stop negotiation attempts upon receiving NotAuthorized + AsyncKuduClient asyncClient = new AsyncKuduClient.AsyncKuduClientBuilder( + harness.getMasterAddressesAsString()) + .defaultAdminOperationTimeoutMs(1000) + .defaultOperationTimeoutMs(1000) + .build(); + KuduClient client = asyncClient.syncClient(); + + String jwt = harness.createJwtFor("account-id", "kudu", false); + assertNotNull(jwt); + client.jwt(jwt); + + waitForClusterCACert(); + final byte[] caCert = harness.getClusterCACertDer(); + assertNotEquals(0, caCert.length); + client.trustedCertificates(Arrays.asList(ByteString.copyFrom(caCert))); + + try (Closeable c = cla.attach()) { + // It doesn't matter what method to call here: ConnectToMaster should not + // succeed, so the corresponding RPC won't be invoked anyway. + client.tableExists("nonexistent"); + fail(); + } catch (NonRecoverableException ex) { + assertTrue(ex.getStatus().isTimedOut()); + } + + assertTrue(cla.getAppendedText(),cla.getAppendedText().contains( + "Negotiated authn=JWT")); + assertTrue(cla.getAppendedText(), cla.getAppendedText().contains( + "Negotiation complete: Not authorized: Server connection negotiation failed")); + assertTrue(cla.getAppendedText(), cla.getAppendedText().contains( + "FATAL_INVALID_JWT: Not authorized: JWT verification failed: failed to verify signature")); + assertTrue(cla.getAppendedText(), cla.getAppendedText().contains( + "Unable to connect to master")); + assertTrue(cla.getAppendedText(), cla.getAppendedText().contains( + "connection closed")); + } + + private void waitForClusterCACert() throws Exception { + // It may take some time for the catalog manager to initialize + // and have IPKI CA certificate ready. + assertEventuallyTrue( + "valid cluster IPKI CA certificate captured", + new BooleanExpression() { + @Override + public boolean get() throws Exception { + return harness.getClusterCACertDer().length != 0; + } + }, + 10000/*timeoutMillis*/); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestNegotiationTLSv13.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestNegotiationTLSv13.java new file mode 100644 index 0000000000..a822a0c5e1 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestNegotiationTLSv13.java @@ -0,0 +1,210 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.kudu.test.ClientTestUtil.createDefaultTable; +import static org.apache.kudu.test.ClientTestUtil.loadDefaultTable; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; +import static org.junit.Assume.assumeTrue; + +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.security.Security; +import java.util.List; +import java.util.Set; +import javax.net.ssl.SSLContext; +import javax.net.ssl.SSLEngine; + +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import com.google.common.io.CharStreams; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.KuduTestHarness.MasterServerConfig; +import org.apache.kudu.test.KuduTestHarness.TabletServerConfig; +import org.apache.kudu.test.TempDirUtils; +import org.apache.kudu.test.cluster.KuduBinaryLocator; +import org.apache.kudu.test.cluster.MiniKuduCluster; +import org.apache.kudu.test.cluster.MiniKuduCluster.MiniKuduClusterBuilder; + +// This is a class for Kudu RPC connection negotiation test scenarios targeting +// TLSv1.3. See TestNegotiator for pre-TLSv1.3 test scenarios. +public class TestNegotiationTLSv13 { + + static final String[] TLS13_CIPHERS = new String[]{ + "TLS_AES_128_GCM_SHA256", + "TLS_AES_256_GCM_SHA384", + "TLS_CHACHA20_POLY1305_SHA256", + }; + + private static final Logger LOG = LoggerFactory.getLogger(TestNegotiation.class); + private static final String TABLE_NAME = "tls_v_1_3_test_table"; + private static final int NUM_ROWS = 10; + + private final MiniKuduClusterBuilder clusterBuilder; + + @Rule + public KuduTestHarness harness; + + // Whether TLSv1.3 supported by both server and client side. + private boolean isTLSv13Supported = false; + + // Check if TLSv1.3 is supported by the JVM. + private static boolean isTLSv13SupportedByJVM() { + // It seems some policy-related globals are initialized due to the + // SSLContext.getInstance("TLSv1.3") call below, so server certificates + // signed by 768-bit RSA keys aren't accepted later on when running test + // scenarios due to default security policies. To work around that, override + // the default security constraints the same way it's done + // in the MiniKuduCluster's constructor. + Security.setProperty("jdk.certpath.disabledAlgorithms", "MD2, RC4, MD5"); + Security.setProperty("jdk.tls.disabledAlgorithms", "SSLv3, RC4, MD5"); + try { + SSLContext ctx = SSLContext.getInstance("TLS"); + ctx.init(null, null, null); + SSLEngine engine = ctx.createSSLEngine(); + engine.setUseClientMode(true); + { + Set supported = Sets.newHashSet(engine.getSupportedCipherSuites()); + List common = Lists.newArrayList(); + for (String c : TLS13_CIPHERS) { + if (supported.contains(c)) { + common.add(c); + } + } + if (common.isEmpty()) { + LOG.info("client side doesn't support TLSv1.3: no common ciphers"); + return false; + } + } + { + String[] enabled = engine.getEnabledProtocols(); + LOG.debug("enabled TLS protocols: {}", Joiner.on(' ').join(enabled)); + Set supported = Sets.newHashSet(engine.getSupportedProtocols()); + LOG.debug("supported TLS protocols: {}", Joiner.on(' ').join(supported)); + if (!supported.contains("TLSv1.3")) { + LOG.info("client side doesn't support TLSv1.3: unsupported protocol"); + return false; + } + } + } catch (KeyManagementException | NoSuchAlgorithmException e) { + LOG.info("client side doesn't support TLSv1.3", e); + return false; + } + return true; + } + + // Check if TLSv1.3 is supported by the Kudu server side. + private static boolean isTLSv13SupportedByServerSide() { + // Try to start kudu-master requiring TLSv1.3. It will fail to start if + // TLSv1.3 isn't supported either by the node's OpenSSL library or + // by the build environment where the kudu-master binary was built. + MiniKuduClusterBuilder b = new MiniKuduClusterBuilder() + .numMasterServers(1) + .numTabletServers(0) + .addMasterServerFlag("--time_source=system_unsync") + .addMasterServerFlag("--rpc_tls_min_protocol=TLSv1.3"); + try (MiniKuduCluster c = b.build()) { + try { + // A sanity check: make sure the started processes haven't crashed. + // MiniKuduCluster does neither detect nor report properly on such + // events otherwise. + c.killAllMasterServers(); + } catch (IOException e) { + LOG.error("unexpected exception:", e); + fail("kudu-master didn't actually start"); + return false; // unreachable + } + } catch (IOException e) { + LOG.info("server side doesn't support TLSv1.3", e); + return false; + } + return true; + } + + public TestNegotiationTLSv13() { + clusterBuilder = new MiniKuduClusterBuilder() + .numMasterServers(1) + .numTabletServers(3) + .enableKerberos(); + + isTLSv13Supported = isTLSv13SupportedByJVM() && isTLSv13SupportedByServerSide(); + if (isTLSv13Supported) { + // By the virtue of excluding all other protocols but TLSv1.3 + // from the list of available TLS protocols at the server side, + // client and server will use TLSv1.3 to negotiate a connection. + clusterBuilder.addMasterServerFlag("--rpc_tls_min_protocol=TLSv1.3"); + clusterBuilder.addTabletServerFlag("--rpc_tls_min_protocol=TLSv1.3"); + } + + harness = new KuduTestHarness(clusterBuilder); + } + + /** + * Make sure that Kudu Java client is able to negotiate RPC connections + * protected by TLSv1.3 with Kudu servers. By the virtue of excluding all + * other protocols but TLSv1.3 from the list of available TLS protocols + * at the server side, this scenario verifies that Kudu Java client is able to + * work with a secure Kudu cluster using TLSv1.3. + * + * Using the JUnit's terminology, this test scenario is conditionally run only + * if both the client and the server sides support TLSv1.3. + */ + @Test + @MasterServerConfig(flags = { + "--rpc-encryption=required", + "--rpc_encrypt_loopback_connections", + "--rpc-trace-negotiation", + }) + @TabletServerConfig(flags = { + "--rpc-encryption=required", + "--rpc_encrypt_loopback_connections", + "--rpc-trace-negotiation", + }) + public void connectionNegotiation() throws Exception { + assumeTrue("TLSv1.3 isn't supported by both sides", isTLSv13Supported); + + // Make sure Java client is able to communicate with Kudu masters and tablet + // servers: create a table and write several rows into the table. + { + KuduClient c = harness.getClient(); + createDefaultTable(c, TABLE_NAME); + loadDefaultTable(c, TABLE_NAME, NUM_ROWS); + } + + // An extra sanity check: on successful negotiation the connection should be + // considered 'private' once it's protected by TLS, so Kudu master must send + // the client an authn token. + { + AsyncKuduClient c = harness.getAsyncClient(); + SecurityContext ctx = c.securityContext; + assertNotNull(ctx.getAuthenticationToken()); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestNegotiator.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestNegotiator.java new file mode 100644 index 0000000000..57347956b1 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestNegotiator.java @@ -0,0 +1,474 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.security.KeyStore; +import java.security.cert.Certificate; +import java.util.List; +import javax.net.ssl.KeyManagerFactory; +import javax.net.ssl.SSLContext; +import javax.net.ssl.SSLEngine; +import javax.net.ssl.SSLEngineResult; +import javax.net.ssl.SSLEngineResult.HandshakeStatus; +import javax.net.ssl.SSLException; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.protobuf.ByteString; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.ExtensionRegistry; +import com.google.protobuf.Message; +import com.google.protobuf.TextFormat; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.embedded.EmbeddedChannel; +import io.netty.handler.ssl.SslHandler; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.client.Negotiator.Success; +import org.apache.kudu.rpc.RpcHeader; +import org.apache.kudu.rpc.RpcHeader.AuthenticationTypePB; +import org.apache.kudu.rpc.RpcHeader.NegotiatePB; +import org.apache.kudu.rpc.RpcHeader.NegotiatePB.NegotiateStep; +import org.apache.kudu.rpc.RpcHeader.NegotiatePB.SaslMechanism; +import org.apache.kudu.rpc.RpcHeader.ResponseHeader; +import org.apache.kudu.rpc.RpcHeader.RpcFeatureFlag; +import org.apache.kudu.security.Token.SignedTokenPB; +import org.apache.kudu.test.junit.RetryRule; +import org.apache.kudu.util.SecurityUtil; + +public class TestNegotiator { + static final Logger LOG = LoggerFactory.getLogger(TestNegotiator.class); + + private EmbeddedChannel embedder; + private SecurityContext secContext; + private SSLEngine serverEngine; + + private static final char[] KEYSTORE_PASSWORD = "password".toCharArray(); + + // This test handles pre-TLSv1.3 protocols only. See TestNegotiationTLSv13 + // for TLSv1.3-specific test scenarios. + // TODO(aserbin): update corresponding test scenarios to work with TLSv1.3 + static final String[] ENABLED_PROTOCOLS = new String[]{ + "TLSv1.2", + "TLSv1.1", + "TLSv1", + }; + + /** + * The cert stored in the keystore, in base64ed DER format. + * The real certs we'll get from the server will not be in Base64, + * but the CertificateFactory also supports binary DER. + */ + private static final String CA_CERT_DER = + "-----BEGIN CERTIFICATE-----\n" + + "MIIDXTCCAkWgAwIBAgIJAOOmFHYkBz4rMA0GCSqGSIb3DQEBCwUAMEUxCzAJBgNVBAYTAkFVMRMw" + + "EQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwHhcN" + + "MTYxMTAyMjI0OTQ5WhcNMTcwMjEwMjI0OTQ5WjBFMQswCQYDVQQGEwJBVTETMBEGA1UECAwKU29t" + + "ZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMIIBIjANBgkqhkiG9w0B" + + "AQEFAAOCAQ8AMIIBCgKCAQEAppo9GwiDisQVYAF9NXl8ykqo0MIi5rfNwiE9kUWbZ2ejzxs+1Cf7" + + "WCn4mzbkJx5ZscRjhnNb6dJxtZJeid/qgiNVBcNzh35H8J+ao0tEbHjCs7rKOX0etsFUp4GQwYkd" + + "fpvVBsU8ciXvkxhvt1XjSU3/YJJRAvCyGVxUQlKiVKGCD4OnFNBwMdNw7qI8ryiRv++7I9udfSuM" + + "713yMeBtkkV7hWUfxrTgQOLsV/CS+TsSoOJ7JJqHozeZ+VYom85UqSfpIFJVzM6S7BTb6SX/vwYI" + + "oS70gubT3HbHgDRcMvpCye1npHL9fL7B87XZn7wnnUem0eeCqWyUjJ82Uj9mQQIDAQABo1AwTjAd" + + "BgNVHQ4EFgQUOY7rpWGoZMrmyRZ9RohPWVwyPBowHwYDVR0jBBgwFoAUOY7rpWGoZMrmyRZ9RohP" + + "WVwyPBowDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEATKh3io8ruqbhmopY3xQWA2pE" + + "hs4ZSu3H+AfULMruVsXKEZjWp27nTsFaxLZYUlzeZr0EcWwZ79qkcA8Dyj+mVHhrCAPpcjsDACh1" + + "ZdUQAgASkVS4VQvkukct3DFa3y0lz5VwQIxjoQR5y6dCvxxXT9NpRo/Z7pd4MRhEbz3NT6PScQ9f" + + "2MTrR0NOikLdB98JlpKQbEKxzbMhWDw4J3mrmK6zdemjdCcRDsBVPswKnyAjkibXaZkpNRzjvDNA" + + "gO88MKlArCYoyRZqIfkcSXAwwTdGQ+5GQLsY9zS49Rrhk9R7eOmDhaHybdRBDqW1JiCSmzURZAxl" + + "nrjox4GmC3JJaA==\n" + + "-----END CERTIFICATE-----"; + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Before + public void setUp() { + serverEngine = createServerEngine(); + serverEngine.setUseClientMode(false); + secContext = new SecurityContext(); + } + + private void startNegotiation(boolean fakeLoopback) { + Negotiator negotiator = new Negotiator("127.0.0.1", secContext, false, "kudu", + false, false, false); + negotiator.overrideLoopbackForTests = fakeLoopback; + embedder = new EmbeddedChannel(negotiator); + negotiator.sendHello(embedder.pipeline().firstContext()); + } + + static CallResponse fakeResponse(ResponseHeader header, Message body) { + ByteBuf buf = Unpooled.buffer(); + KuduRpc.toByteBuf(buf, header, body); + buf = buf.slice(4, buf.readableBytes() - 4); + return new CallResponse(buf); + } + + KeyStore loadTestKeystore() throws Exception { + KeyStore ks = KeyStore.getInstance("JKS"); + try (InputStream stream = + TestNegotiator.class.getResourceAsStream("/test-key-and-cert.jks")) { + ks.load(stream, KEYSTORE_PASSWORD); + } + return ks; + } + + SSLEngine createServerEngine() { + try { + KeyManagerFactory kmf = KeyManagerFactory.getInstance("SunX509"); + kmf.init(loadTestKeystore(), KEYSTORE_PASSWORD); + SSLContext ctx = SSLContext.getInstance("TLS"); + ctx.init(kmf.getKeyManagers(), null, null); + SSLEngine engine = ctx.createSSLEngine(); + engine.setEnabledProtocols(ENABLED_PROTOCOLS); + return engine; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Checks that the client sends a connection context and then yields + * a Negotiation.Success to the pipeline. + * @return the result + */ + private Success assertComplete(boolean isTls) throws Exception { + RpcOutboundMessage msg = isTls ? + unwrapOutboundMessage(embedder.readOutbound(), + RpcHeader.ConnectionContextPB.newBuilder()) : + embedder.readOutbound(); + RpcHeader.ConnectionContextPB connCtx = (RpcHeader.ConnectionContextPB) msg.getBody(); + assertEquals(Negotiator.CONNECTION_CTX_CALL_ID, msg.getHeaderBuilder().getCallId()); + assertEquals(System.getProperty("user.name"), connCtx.getDEPRECATEDUserInfo().getRealUser()); + + // Expect the client to also emit a negotiation Success. + Success success = embedder.readInbound(); + assertNotNull(success); + return success; + } + + @Test + public void testChannelBinding() throws Exception { + KeyStore ks = loadTestKeystore(); + Certificate cert = ks.getCertificate("1"); + byte[] bindings = SecurityUtil.getEndpointChannelBindings(cert); + assertEquals(32, bindings.length); + } + + /** + * Simple test case for a PLAIN negotiation. + */ + @Test + public void testNegotiation() throws Exception { + startNegotiation(false); + + // Expect client->server: NEGOTIATE. + RpcOutboundMessage msg = embedder.readOutbound(); + NegotiatePB body = (NegotiatePB) msg.getBody(); + assertEquals(Negotiator.SASL_CALL_ID, msg.getHeaderBuilder().getCallId()); + assertEquals(NegotiateStep.NEGOTIATE, body.getStep()); + + // Respond with NEGOTIATE. + embedder.writeInbound(fakeResponse( + ResponseHeader.newBuilder().setCallId(Negotiator.SASL_CALL_ID).build(), + NegotiatePB.newBuilder() + .addSaslMechanisms(SaslMechanism.newBuilder().setMechanism("PLAIN")) + .setStep(NegotiateStep.NEGOTIATE) + .build())); + embedder.flushInbound(); + + // Expect client->server: SASL_INITIATE (PLAIN) + msg = embedder.readOutbound(); + body = (NegotiatePB) msg.getBody(); + + assertEquals(Negotiator.SASL_CALL_ID, msg.getHeaderBuilder().getCallId()); + assertEquals(NegotiateStep.SASL_INITIATE, body.getStep()); + assertEquals(1, body.getSaslMechanismsCount()); + assertEquals("PLAIN", body.getSaslMechanisms(0).getMechanism()); + assertTrue(body.hasToken()); + + // Respond with SASL_SUCCESS: + embedder.writeInbound(fakeResponse( + ResponseHeader.newBuilder().setCallId(Negotiator.SASL_CALL_ID).build(), + NegotiatePB.newBuilder() + .setStep(NegotiateStep.SASL_SUCCESS) + .build())); + embedder.flushInbound(); + + // Expect client->server: ConnectionContext + assertComplete(/*isTls*/ false); + } + + private static void runTasks(SSLEngineResult result, + SSLEngine engine) { + if (result.getHandshakeStatus() != HandshakeStatus.NEED_TASK) { + return; + } + Runnable task; + while ((task = engine.getDelegatedTask()) != null) { + task.run(); + } + } + + private static CallResponse runServerStep(SSLEngine engine, + ByteString clientTlsMessage) throws SSLException { + LOG.debug("Handling TLS message from client: {}", Bytes.hex(clientTlsMessage.toByteArray())); + ByteBuffer dst = ByteBuffer.allocate(engine.getSession().getPacketBufferSize()); + ByteBuffer src = ByteBuffer.wrap(clientTlsMessage.toByteArray()); + do { + SSLEngineResult result = engine.unwrap(src, dst); + runTasks(result, engine); + } while (engine.getHandshakeStatus() == SSLEngineResult.HandshakeStatus.NEED_UNWRAP); + + if (engine.getHandshakeStatus() == SSLEngineResult.HandshakeStatus.NEED_WRAP) { + // The server has more to send. + // Produce the ServerHello and send it back to the client. + List bufs = Lists.newArrayList(); + while (engine.getHandshakeStatus() == SSLEngineResult.HandshakeStatus.NEED_WRAP) { + dst.clear(); + runTasks(engine.wrap(ByteBuffer.allocate(0), dst), engine); + dst.flip(); + bufs.add(ByteString.copyFrom(dst)); + } + return fakeResponse( + ResponseHeader.newBuilder().setCallId(Negotiator.SASL_CALL_ID).build(), + NegotiatePB.newBuilder() + .setTlsHandshake(ByteString.copyFrom(bufs)) + .setStep(NegotiateStep.TLS_HANDSHAKE) + .build()); + } else if (engine.getHandshakeStatus() == SSLEngineResult.HandshakeStatus.NOT_HANDSHAKING) { + // Handshake complete. + return null; + } else { + throw new AssertionError("unexpected state: " + engine.getHandshakeStatus()); + } + } + + /** + * Completes the 3-step TLS handshake, assuming that the client is + * about to generate the first of the messages. + */ + private void runTlsHandshake(boolean isAuthOnly) throws SSLException { + RpcOutboundMessage msg = embedder.readOutbound(); + NegotiatePB body = (NegotiatePB) msg.getBody(); + assertEquals(NegotiateStep.TLS_HANDSHAKE, body.getStep()); + + // Consume the ClientHello in our fake server, which should generate ServerHello. + embedder.writeInbound(runServerStep(serverEngine, body.getTlsHandshake())); + embedder.flushInbound(); + + // Expect client to generate ClientKeyExchange, ChangeCipherSpec, Finished. + msg = embedder.readOutbound(); + body = (NegotiatePB) msg.getBody(); + assertEquals(NegotiateStep.TLS_HANDSHAKE, body.getStep()); + + // Now that the handshake is complete, we need to encode RpcOutboundMessages + // to ByteBuf to be accepted by the the SslHandler. + // This encoder is added to the pipeline by the Connection in normal Negotiator usage. + if (!isAuthOnly) { + embedder.pipeline().addFirst("encode-outbound", new RpcOutboundMessage.Encoder()); + } + + // Server consumes the above. Should send the TLS "Finished" message. + embedder.writeInbound(runServerStep(serverEngine, body.getTlsHandshake())); + embedder.flushInbound(); + } + + @Test + public void testTlsNegotiation() throws Exception { + startNegotiation(false); + + // Expect client->server: NEGOTIATE, TLS included. + RpcOutboundMessage msg = embedder.readOutbound(); + NegotiatePB body = (NegotiatePB) msg.getBody(); + assertEquals(NegotiateStep.NEGOTIATE, body.getStep()); + assertTrue(body.getSupportedFeaturesList().contains(RpcFeatureFlag.TLS)); + + // Fake a server response with TLS enabled. + embedder.writeInbound(fakeResponse( + ResponseHeader.newBuilder().setCallId(Negotiator.SASL_CALL_ID).build(), + NegotiatePB.newBuilder() + .addSaslMechanisms(NegotiatePB.SaslMechanism.newBuilder().setMechanism("PLAIN")) + .addSupportedFeatures(RpcFeatureFlag.TLS) + .setStep(NegotiateStep.NEGOTIATE) + .build())); + embedder.flushInbound(); + + // Expect client->server: TLS_HANDSHAKE. + runTlsHandshake(/*isAuthOnly*/ false); + + // The pipeline should now have an SSL handler as the first handler. + assertTrue(embedder.pipeline().first() instanceof SslHandler); + + // The Negotiator should have sent the SASL_INITIATE at this point. + msg = unwrapOutboundMessage(embedder.readOutbound(), RpcHeader.NegotiatePB.newBuilder()); + body = (NegotiatePB) msg.getBody(); + assertEquals(NegotiateStep.SASL_INITIATE, body.getStep()); + } + + @Test + public void testTlsNegotiationAuthOnly() throws Exception { + startNegotiation(true); + + // Expect client->server: NEGOTIATE, TLS and TLS_AUTHENTICATION_ONLY included. + RpcOutboundMessage msg = embedder.readOutbound(); + NegotiatePB body = (NegotiatePB) msg.getBody(); + assertEquals(NegotiateStep.NEGOTIATE, body.getStep()); + assertTrue(body.getSupportedFeaturesList().contains(RpcFeatureFlag.TLS)); + assertTrue(body.getSupportedFeaturesList().contains( + RpcFeatureFlag.TLS_AUTHENTICATION_ONLY)); + + // Fake a server response with TLS and TLS_AUTHENTICATION_ONLY enabled. + embedder.writeInbound(fakeResponse( + ResponseHeader.newBuilder().setCallId(Negotiator.SASL_CALL_ID).build(), + NegotiatePB.newBuilder() + .addSaslMechanisms(NegotiatePB.SaslMechanism.newBuilder().setMechanism("PLAIN")) + .addSupportedFeatures(RpcFeatureFlag.TLS) + .addSupportedFeatures(RpcFeatureFlag.TLS_AUTHENTICATION_ONLY) + .setStep(NegotiateStep.NEGOTIATE) + .build())); + embedder.flushInbound(); + + // Expect client->server: TLS_HANDSHAKE. + runTlsHandshake(/*isAuthOnly*/ true); + + // The pipeline should *not* have an SSL handler as the first handler, + // since we used TLS for authentication only. + assertFalse(embedder.pipeline().first() instanceof SslHandler); + + // The Negotiator should have sent the SASL_INITIATE at this point. + msg = embedder.readOutbound(); + body = (NegotiatePB) msg.getBody(); + assertEquals(NegotiateStep.SASL_INITIATE, body.getStep()); + } + + /** + * Test that, if we don't have any trusted certs, we don't expose + * token authentication as an option. + */ + @Test + public void testNoTokenAuthWhenNoTrustedCerts() throws Exception { + secContext.setAuthenticationToken(SignedTokenPB.getDefaultInstance()); + startNegotiation(false); + + // Expect client->server: NEGOTIATE, TLS included, Token not included. + RpcOutboundMessage msg = embedder.readOutbound(); + NegotiatePB body = (NegotiatePB) msg.getBody(); + assertEquals("supported_features: APPLICATION_FEATURE_FLAGS " + + "supported_features: TLS " + + "step: NEGOTIATE " + + "authn_types { sasl { } }", TextFormat.shortDebugString(body)); + } + + /** + * Test that, if we have a trusted CA cert, we expose token authentication + * as an option during negotiation, and run it to completion. + */ + @Test + public void testTokenAuthWithTrustedCerts() throws Exception { + secContext.trustCertificates(ImmutableList.of(ByteString.copyFromUtf8(CA_CERT_DER))); + secContext.setAuthenticationToken(SignedTokenPB.getDefaultInstance()); + startNegotiation(false); + + // Expect client->server: NEGOTIATE, TLS included, Token included. + RpcOutboundMessage msg = embedder.readOutbound(); + NegotiatePB body = (NegotiatePB) msg.getBody(); + assertEquals("supported_features: APPLICATION_FEATURE_FLAGS " + + "supported_features: TLS " + + "step: NEGOTIATE " + + "authn_types { sasl { } } " + + "authn_types { token { } }", TextFormat.shortDebugString(body)); + + // Fake a server response with TLS enabled and TOKEN chosen. + embedder.writeInbound(fakeResponse( + ResponseHeader.newBuilder().setCallId(Negotiator.SASL_CALL_ID).build(), + NegotiatePB.newBuilder() + .addSupportedFeatures(RpcFeatureFlag.TLS) + .addAuthnTypes(AuthenticationTypePB.newBuilder().setToken( + AuthenticationTypePB.Token.getDefaultInstance())) + .setStep(NegotiateStep.NEGOTIATE) + .build())); + embedder.flushInbound(); + + // Expect to now run the TLS handshake + runTlsHandshake(/*isAuthOnly*/ false); + + // Expect the client to send the token. + msg = unwrapOutboundMessage(embedder.readOutbound(), RpcHeader.NegotiatePB.newBuilder()); + body = (NegotiatePB) msg.getBody(); + assertEquals("step: TOKEN_EXCHANGE authn_token { }", + TextFormat.shortDebugString(body)); + + // Fake a response indicating success. + embedder.writeInbound(fakeResponse( + ResponseHeader.newBuilder().setCallId(Negotiator.SASL_CALL_ID).build(), + NegotiatePB.newBuilder() + .setStep(NegotiateStep.TOKEN_EXCHANGE) + .build())); + embedder.flushInbound(); + + // TODO (ghenke): For some reason the SslHandler adds extra empty messages here. + // This should be harmless, but it would be good to understand or fix why. + while (((ByteBuf) embedder.outboundMessages().peek()).readableBytes() == 0) { + embedder.readOutbound(); + } + + // Should be complete now. + assertComplete(/*isTls*/ true); + } + + private RpcOutboundMessage unwrapOutboundMessage(ByteBuf wrappedBuf, + Message.Builder requestBuilder) + throws Exception { + // Create an SSL handle to handle unwrapping the ssl message. + SslHandler handler = new SslHandler(serverEngine); + EmbeddedChannel serverSSLChannel = new EmbeddedChannel(handler); + + // Pass the ssl message through the channel with the ssl handler. + serverSSLChannel.writeInbound(wrappedBuf); + serverSSLChannel.flushInbound(); + ByteBuf unwrappedbuf = serverSSLChannel.readInbound(); + + // Read the message size and bytes. + final int size = unwrappedbuf.readInt(); + final byte [] bytes = new byte[size]; + unwrappedbuf.getBytes(unwrappedbuf.readerIndex(), bytes); + + // Parse the message header. + final CodedInputStream in = CodedInputStream.newInstance(bytes); + RpcHeader.RequestHeader.Builder header = RpcHeader.RequestHeader.newBuilder(); + in.readMessage(header, ExtensionRegistry.getEmptyRegistry()); + + // Parse the request message. + in.readMessage(requestBuilder, ExtensionRegistry.getEmptyRegistry()); + + return new RpcOutboundMessage(header, requestBuilder.build()); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestOperation.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestOperation.java new file mode 100644 index 0000000000..5cb2c15a62 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestOperation.java @@ -0,0 +1,502 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.List; + +import com.google.common.collect.ImmutableList; +import com.google.common.primitives.Longs; +import org.junit.Rule; +import org.junit.Test; +import org.mockito.Mockito; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.RowOperations.RowOperationsPB; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.Operation.ChangeType; +import org.apache.kudu.test.junit.RetryRule; +import org.apache.kudu.tserver.Tserver.WriteRequestPBOrBuilder; +import org.apache.kudu.util.CharUtil; +import org.apache.kudu.util.DateUtil; + +/** + * Unit tests for Operation + */ +public class TestOperation { + + @Rule + public RetryRule retryRule = new RetryRule(); + + private Schema createManyStringsSchema() { + ArrayList columns = new ArrayList<>(5); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.STRING).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c2", Type.STRING).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c3", Type.STRING).nullable(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c4", Type.STRING).nullable(true).build()); + return new Schema(columns); + } + + @Test + public void testSetStrings() { + KuduTable table = Mockito.mock(KuduTable.class); + Mockito.doReturn(createManyStringsSchema()).when(table).getSchema(); + Insert insert = new Insert(table); + PartialRow row = insert.getRow(); + row.addString("c0", "c0_val"); + row.addString("c2", "c2_val"); + row.addString("c1", "c1_val"); + row.addString("c3", "c3_val"); + row.addString("c4", "c4_val"); + + { + WriteRequestPBOrBuilder pb = + Operation.createAndFillWriteRequestPB(ImmutableList.of(insert)); + RowOperationsPB rowOps = pb.getRowOperations(); + assertEquals(6 * 5, rowOps.getIndirectData().size()); + assertEquals("c0_valc1_valc2_valc3_valc4_val", rowOps.getIndirectData().toStringUtf8()); + byte[] rows = rowOps.getRows().toByteArray(); + assertEquals(ChangeType.INSERT.toEncodedByte(), rows[0]); + // The "isset" bitset should have 5 bits set + assertEquals(0x1f, rows[1]); + // The "null" bitset should have no bits set + assertEquals(0, rows[2]); + + // Check the strings. + int offset = 3; + for (long i = 0; i <= 4; i++) { + // The offset into the indirect buffer + assertEquals(6L * i, Bytes.getLong(rows, offset)); + offset += Longs.BYTES; + // The length of the pointed-to string. + assertEquals(6L, Bytes.getLong(rows, offset)); + offset += Longs.BYTES; + } + + // Should have used up whole buffer. + assertEquals(rows.length, offset); + } + + // Setting a field to NULL should add to the null bitmap and remove + // the old value from the indirect buffer. + row.setNull("c3"); + { + WriteRequestPBOrBuilder pb = + Operation.createAndFillWriteRequestPB(ImmutableList.of(insert)); + RowOperationsPB rowOps = pb.getRowOperations(); + assertEquals(6 * 4, rowOps.getIndirectData().size()); + assertEquals("c0_valc1_valc2_valc4_val", rowOps.getIndirectData().toStringUtf8()); + byte[] rows = rowOps.getRows().toByteArray(); + assertEquals(ChangeType.INSERT.toEncodedByte(), rows[0]); + // The "isset" bitset should have 5 bits set + assertEquals(0x1f, rows[1]); + // The "null" bitset should have 1 bit set for the null column + assertEquals(1 << 3, rows[2]); + + // Check the strings. + int offset = 3; + int indirOffset = 0; + for (int i = 0; i <= 4; i++) { + if (i == 3) { + continue; + } + // The offset into the indirect buffer + assertEquals(indirOffset, Bytes.getLong(rows, offset)); + indirOffset += 6; + offset += Longs.BYTES; + // The length of the pointed-to string. + assertEquals(6, Bytes.getLong(rows, offset)); + offset += Longs.BYTES; + } + // Should have used up whole buffer. + assertEquals(rows.length, offset); + } + } + + private Schema createAllTypesKeySchema() { + ArrayList columns = new ArrayList<>(7); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT8).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT16).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c2", Type.INT32).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c3", Type.INT64).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c4", Type.UNIXTIME_MICROS).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c5", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c6", Type.BINARY).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c7", Type.DATE).key(true).build()); + return new Schema(columns); + } + + @Test + public void testRowKeyStringify() { + KuduTable table = Mockito.mock(KuduTable.class); + Mockito.doReturn(createAllTypesKeySchema()).when(table).getSchema(); + Insert insert = new Insert(table); + PartialRow row = insert.getRow(); + row.addByte("c0", (byte) 1); + row.addShort("c1", (short) 2); + row.addInt("c2", 3); + row.addLong("c3", 4); + row.addLong("c4", 5); + row.addString("c5", "c5_val"); + row.addBinary("c6", Bytes.fromString("c6_val")); + row.addDate("c7", DateUtil.epochDaysToSqlDate(0)); + + assertEquals("(int8 c0=1, int16 c1=2, int32 c2=3, int64 c3=4, " + + "unixtime_micros c4=1970-01-01T00:00:00.000005Z, string c5=\"c5_val\", " + + "binary c6=\"c6_val\", date c7=1970-01-01)", + insert.getRow().stringifyRowKey()); + + // Test an incomplete row key. + insert = new Insert(table); + row = insert.getRow(); + row.addByte("c0", (byte) 1); + try { + row.stringifyRowKey(); + fail("Should not be able to stringifyRowKey when not all keys are specified"); + } catch (IllegalStateException ise) { + // Expected. + } + } + + @Test + public void testEncodeDecodeRangeSimpleTypes() { + ArrayList columns = new ArrayList<>(2); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT64).build()); + final Schema schema = new Schema(columns); + + final PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 0); + + final PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + + final Operation.OperationsEncoder enc = new Operation.OperationsEncoder(); + final RowOperationsPB encoded = enc.encodeLowerAndUpperBounds( + lower, upper, RangePartitionBound.INCLUSIVE_BOUND, RangePartitionBound.EXCLUSIVE_BOUND); + + Operation.OperationsDecoder dec = new Operation.OperationsDecoder(); + List decoded = + dec.decodeRangePartitions(encoded, schema); + assertEquals(1, decoded.size()); + assertEquals(RangePartitionBound.INCLUSIVE_BOUND, + decoded.get(0).getLowerBoundType()); + assertEquals(RangePartitionBound.EXCLUSIVE_BOUND, + decoded.get(0).getUpperBoundType()); + final PartialRow lowerDecoded = decoded.get(0).getLowerBound(); + final PartialRow upperDecoded = decoded.get(0).getUpperBound(); + + assertTrue(lowerDecoded.isSet("c0")); + assertEquals(0, lowerDecoded.getInt("c0")); + assertFalse(lowerDecoded.isSet("c1")); + assertEquals(lower.toString(), lowerDecoded.toString()); + + assertTrue(upperDecoded.isSet("c0")); + assertEquals(100, upperDecoded.getInt("c0")); + assertFalse(upperDecoded.isSet("c1")); + assertEquals(upper.toString(), upperDecoded.toString()); + } + + @Test + public void testEncodeDecodeRangeStringTypes() { + ArrayList columns = new ArrayList<>(2); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.STRING).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c2", Type.VARCHAR) + .nullable(true) + .typeAttributes(CharUtil.typeAttributes(10)) + .build()); + final Schema schema = new Schema(columns); + + final PartialRow lower = schema.newPartialRow(); + lower.addString("c0", "a"); + + final PartialRow upper = schema.newPartialRow(); + upper.addString("c0", "b"); + + final Operation.OperationsEncoder enc = new Operation.OperationsEncoder(); + final RowOperationsPB encoded = enc.encodeLowerAndUpperBounds( + lower, upper, RangePartitionBound.INCLUSIVE_BOUND, RangePartitionBound.EXCLUSIVE_BOUND); + + Operation.OperationsDecoder dec = new Operation.OperationsDecoder(); + List decoded = dec.decodeRangePartitions(encoded, schema); + assertEquals(1, decoded.size()); + assertEquals(RangePartitionBound.INCLUSIVE_BOUND, + decoded.get(0).getLowerBoundType()); + assertEquals(RangePartitionBound.EXCLUSIVE_BOUND, + decoded.get(0).getUpperBoundType()); + final PartialRow lowerDecoded = decoded.get(0).getLowerBound(); + final PartialRow upperDecoded = decoded.get(0).getUpperBound(); + + assertTrue(lowerDecoded.isSet("c0")); + assertEquals("a", lowerDecoded.getString("c0")); + assertFalse(lowerDecoded.isSet("c1")); + assertFalse(lowerDecoded.isSet("c2")); + assertEquals(lower.toString(), lowerDecoded.toString()); + + assertTrue(upperDecoded.isSet("c0")); + assertEquals("b", upperDecoded.getString("c0")); + assertFalse(upperDecoded.isSet("c1")); + assertFalse(upperDecoded.isSet("c2")); + assertEquals(upper.toString(), upperDecoded.toString()); + } + + @Test + public void testEncodeDecodeRangeMixedTypes() { + ArrayList columns = new ArrayList<>(2); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0i", Type.INT32).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1s", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c2i", Type.INT64).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c3s", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c4i", Type.INT16).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c5s", Type.BINARY).nullable(true).build()); + final Schema schema = new Schema(columns); + + final PartialRow lower = schema.newPartialRow(); + lower.addInt("c0i", 0); + lower.addString("c1s", "a"); + lower.addLong("c2i", -10); + lower.addString("c3s", "A"); + lower.addShort("c4i", (short)-100); + + final PartialRow upper = schema.newPartialRow(); + upper.addInt("c0i", 1); + upper.addString("c1s", "b"); + upper.addLong("c2i", 10); + upper.addString("c3s", "B"); + upper.addShort("c4i", (short)100); + + final Operation.OperationsEncoder enc = new Operation.OperationsEncoder(); + final RowOperationsPB encoded = enc.encodeLowerAndUpperBounds( + lower, upper, RangePartitionBound.INCLUSIVE_BOUND, RangePartitionBound.EXCLUSIVE_BOUND); + + Operation.OperationsDecoder dec = new Operation.OperationsDecoder(); + List decoded = dec.decodeRangePartitions(encoded, schema); + assertEquals(1, decoded.size()); + assertEquals(RangePartitionBound.INCLUSIVE_BOUND, + decoded.get(0).getLowerBoundType()); + assertEquals(RangePartitionBound.EXCLUSIVE_BOUND, + decoded.get(0).getUpperBoundType()); + final PartialRow lowerDecoded = decoded.get(0).getLowerBound(); + final PartialRow upperDecoded = decoded.get(0).getUpperBound(); + + assertTrue(lowerDecoded.isSet("c0i")); + assertEquals(0, lowerDecoded.getInt("c0i")); + assertTrue(lowerDecoded.isSet("c1s")); + assertEquals("a", lowerDecoded.getString("c1s")); + assertTrue(lowerDecoded.isSet("c2i")); + assertEquals(-10, lowerDecoded.getLong("c2i")); + assertTrue(lowerDecoded.isSet("c3s")); + assertEquals("A", lowerDecoded.getString("c3s")); + assertTrue(lowerDecoded.isSet("c4i")); + assertEquals(-100, lowerDecoded.getShort("c4i")); + assertFalse(lowerDecoded.isSet("c5s")); + assertEquals(lower.toString(), lowerDecoded.toString()); + + assertTrue(upperDecoded.isSet("c0i")); + assertEquals(1, upperDecoded.getInt("c0i")); + assertTrue(upperDecoded.isSet("c1s")); + assertEquals("b", upperDecoded.getString("c1s")); + assertTrue(upperDecoded.isSet("c2i")); + assertEquals(10, upperDecoded.getLong("c2i")); + assertTrue(upperDecoded.isSet("c3s")); + assertEquals("B", upperDecoded.getString("c3s")); + assertTrue(upperDecoded.isSet("c4i")); + assertEquals(100, upperDecoded.getShort("c4i")); + assertFalse(upperDecoded.isSet("c5s")); + assertEquals(upper.toString(), upperDecoded.toString()); + } + + @Test + public void testEncodeDecodeMultipleRangePartitions() { + ArrayList columns = new ArrayList<>(2); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT64).build()); + final Schema schema = new Schema(columns); + + List rangePartitions = new ArrayList<>(); + { + final PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 0); + + final PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + rangePartitions.add(new RangePartition( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND)); + } + { + final PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 200); + + final PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 300); + rangePartitions.add(new RangePartition( + lower, + upper, + RangePartitionBound.EXCLUSIVE_BOUND, + RangePartitionBound.INCLUSIVE_BOUND)); + } + + final Operation.OperationsEncoder enc = new Operation.OperationsEncoder(); + final RowOperationsPB encoded = enc.encodeRangePartitions( + rangePartitions, ImmutableList.of()); + + Operation.OperationsDecoder dec = new Operation.OperationsDecoder(); + List decoded = dec.decodeRangePartitions(encoded, schema); + assertEquals(2, decoded.size()); + + assertEquals(RangePartitionBound.INCLUSIVE_BOUND, + decoded.get(0).getLowerBoundType()); + assertEquals(RangePartitionBound.EXCLUSIVE_BOUND, + decoded.get(0).getUpperBoundType()); + { + final PartialRow lowerDecoded = decoded.get(0).getLowerBound(); + final PartialRow upperDecoded = decoded.get(0).getUpperBound(); + + assertTrue(lowerDecoded.isSet("c0")); + assertEquals(0, lowerDecoded.getInt("c0")); + assertFalse(lowerDecoded.isSet("c1")); + + assertTrue(upperDecoded.isSet("c0")); + assertEquals(100, upperDecoded.getInt("c0")); + assertFalse(upperDecoded.isSet("c1")); + } + + assertEquals(RangePartitionBound.EXCLUSIVE_BOUND, + decoded.get(1).getLowerBoundType()); + assertEquals(RangePartitionBound.INCLUSIVE_BOUND, + decoded.get(1).getUpperBoundType()); + { + final PartialRow lowerDecoded = decoded.get(1).getLowerBound(); + final PartialRow upperDecoded = decoded.get(1).getUpperBound(); + + assertTrue(lowerDecoded.isSet("c0")); + assertEquals(200, lowerDecoded.getInt("c0")); + assertFalse(lowerDecoded.isSet("c1")); + + assertTrue(upperDecoded.isSet("c0")); + assertEquals(300, upperDecoded.getInt("c0")); + assertFalse(upperDecoded.isSet("c1")); + } + } + + @Test + public void testEncodeDecodeMultipleRangePartitionsNullableColumns() { + ArrayList columns = new ArrayList<>(2); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", Type.INT32).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c2", Type.INT64).nullable(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c3", Type.STRING).nullable(true).build()); + final Schema schema = new Schema(columns); + + List rangePartitions = new ArrayList<>(); + { + final PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 0); + lower.addString("c1", "a"); + + final PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 100); + upper.addString("c1", "c"); + rangePartitions.add(new RangePartition( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND)); + } + { + final PartialRow lower = schema.newPartialRow(); + lower.addInt("c0", 200); + lower.addString("c1", "e"); + + final PartialRow upper = schema.newPartialRow(); + upper.addInt("c0", 300); + upper.addString("c1", "f"); + rangePartitions.add(new RangePartition( + lower, + upper, + RangePartitionBound.EXCLUSIVE_BOUND, + RangePartitionBound.INCLUSIVE_BOUND)); + } + + final Operation.OperationsEncoder enc = new Operation.OperationsEncoder(); + final RowOperationsPB encoded = enc.encodeRangePartitions( + rangePartitions, ImmutableList.of()); + + Operation.OperationsDecoder dec = new Operation.OperationsDecoder(); + List decoded = dec.decodeRangePartitions(encoded, schema); + assertEquals(2, decoded.size()); + + assertEquals(RangePartitionBound.INCLUSIVE_BOUND, + decoded.get(0).getLowerBoundType()); + assertEquals(RangePartitionBound.EXCLUSIVE_BOUND, + decoded.get(0).getUpperBoundType()); + { + final PartialRow lowerDecoded = decoded.get(0).getLowerBound(); + final PartialRow upperDecoded = decoded.get(0).getUpperBound(); + + assertTrue(lowerDecoded.isSet("c0")); + assertEquals(0, lowerDecoded.getInt("c0")); + assertTrue(lowerDecoded.isSet("c1")); + assertEquals("a", lowerDecoded.getString("c1")); + assertFalse(lowerDecoded.isSet("c2")); + assertFalse(lowerDecoded.isSet("c3")); + + assertTrue(upperDecoded.isSet("c0")); + assertEquals(100, upperDecoded.getInt("c0")); + assertTrue(upperDecoded.isSet("c1")); + assertEquals("c", upperDecoded.getString("c1")); + assertFalse(upperDecoded.isSet("c2")); + assertFalse(upperDecoded.isSet("c3")); + } + + + assertEquals(RangePartitionBound.EXCLUSIVE_BOUND, + decoded.get(1).getLowerBoundType()); + assertEquals(RangePartitionBound.INCLUSIVE_BOUND, + decoded.get(1).getUpperBoundType()); + { + final PartialRow lowerDecoded = decoded.get(1).getLowerBound(); + final PartialRow upperDecoded = decoded.get(1).getUpperBound(); + + assertTrue(lowerDecoded.isSet("c0")); + assertEquals(200, lowerDecoded.getInt("c0")); + assertTrue(lowerDecoded.isSet("c1")); + assertEquals("e", lowerDecoded.getString("c1")); + assertFalse(lowerDecoded.isSet("c2")); + assertFalse(lowerDecoded.isSet("c3")); + + assertTrue(upperDecoded.isSet("c0")); + assertEquals(300, upperDecoded.getInt("c0")); + assertTrue(upperDecoded.isSet("c1")); + assertEquals("f", upperDecoded.getString("c1")); + assertFalse(upperDecoded.isSet("c2")); + assertFalse(upperDecoded.isSet("c3")); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestPartialRow.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestPartialRow.java new file mode 100644 index 0000000000..d64503b6e2 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestPartialRow.java @@ -0,0 +1,633 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.getPartialRowWithAllTypes; +import static org.apache.kudu.test.ClientTestUtil.getSchemaWithAllTypes; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.sql.Date; +import java.sql.Timestamp; +import java.time.LocalDate; + +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.test.junit.RetryRule; +import org.apache.kudu.util.DateUtil; + +public class TestPartialRow { + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Test + public void testGetters() { + PartialRow partialRow = getPartialRowWithAllTypes(); + assertEquals(true, partialRow.getBoolean("bool")); + assertEquals(42, partialRow.getByte("int8")); + assertEquals(43, partialRow.getShort("int16")); + assertEquals(44, partialRow.getInt("int32")); + assertEquals(45, partialRow.getLong("int64")); + assertEquals(new Timestamp(1234567890), partialRow.getTimestamp("timestamp")); + assertEquals(Date.valueOf(LocalDate.ofEpochDay(0)), partialRow.getDate("date")); + assertEquals(52.35F, partialRow.getFloat("float"), 0.0f); + assertEquals(53.35, partialRow.getDouble("double"), 0.0); + assertEquals("fun with ütf\0", partialRow.getString("string")); + assertArrayEquals(new byte[] { 0, 1, 2, 3, 4 }, + partialRow.getBinaryCopy("binary-array")); + assertArrayEquals(new byte[] { 5, 6, 7, 8, 9 }, + partialRow.getBinaryCopy("binary-bytebuffer")); + assertEquals(ByteBuffer.wrap(new byte[] { 0, 1, 2, 3, 4 }), + partialRow.getBinary("binary-array")); + assertEquals(ByteBuffer.wrap(new byte[] { 5, 6, 7, 8, 9 }), + partialRow.getBinary("binary-bytebuffer")); + assertTrue(partialRow.isSet("null")); + assertTrue(partialRow.isNull("null")); + assertEquals(BigDecimal.valueOf(12345, 3), + partialRow.getDecimal("decimal")); + } + + @Test + public void testGetObject() { + PartialRow partialRow = getPartialRowWithAllTypes(); + assertTrue(partialRow.getObject("bool") instanceof Boolean); + assertEquals(true, partialRow.getObject("bool")); + assertTrue(partialRow.getObject("int8") instanceof Byte); + assertEquals((byte) 42, partialRow.getObject("int8")); + assertTrue(partialRow.getObject("int16") instanceof Short); + assertEquals((short)43, partialRow.getObject("int16")); + assertTrue(partialRow.getObject("int32") instanceof Integer); + assertEquals(44, partialRow.getObject("int32")); + assertTrue(partialRow.getObject("int64") instanceof Long); + assertEquals((long) 45, partialRow.getObject("int64")); + assertTrue(partialRow.getObject("timestamp") instanceof Timestamp); + assertEquals(new Timestamp(1234567890), partialRow.getObject("timestamp")); + assertTrue(partialRow.getObject("date") instanceof Date); + assertEquals(Date.valueOf(LocalDate.ofEpochDay(0)), partialRow.getObject("date")); + assertTrue(partialRow.getObject("float") instanceof Float); + assertEquals(52.35F, (float) partialRow.getObject("float"), 0.0f); + assertTrue(partialRow.getObject("double") instanceof Double); + assertEquals(53.35, (double) partialRow.getObject("double"), 0.0); + assertTrue(partialRow.getObject("string") instanceof String); + assertEquals("fun with ütf\0", partialRow.getObject("string")); + assertTrue(partialRow.getObject("varchar") instanceof String); + assertEquals("árvíztűrő ", partialRow.getObject("varchar")); + assertTrue(partialRow.getObject("binary-array") instanceof byte[]); + assertArrayEquals(new byte[] { 0, 1, 2, 3, 4 }, + partialRow.getBinaryCopy("binary-array")); + assertTrue(partialRow.getObject("binary-bytebuffer") instanceof byte[]); + assertEquals(ByteBuffer.wrap(new byte[] { 5, 6, 7, 8, 9 }), + partialRow.getBinary("binary-bytebuffer")); + assertNull(partialRow.getObject("null")); + assertTrue(partialRow.getObject("decimal") instanceof BigDecimal); + assertEquals(BigDecimal.valueOf(12345, 3), + partialRow.getObject("decimal")); + } + + @Test + public void testAddObject() { + Schema schema = getSchemaWithAllTypes(); + // Ensure we aren't missing any types + assertEquals(15, schema.getColumnCount()); + + PartialRow row = schema.newPartialRow(); + row.addObject("int8", (byte) 42); + row.addObject("int16", (short) 43); + row.addObject("int32", 44); + row.addObject("int64", 45L); + row.addObject("timestamp", new Timestamp(1234567890)); + row.addObject("date", Date.valueOf(LocalDate.ofEpochDay(0))); + row.addObject("bool", true); + row.addObject("float", 52.35F); + row.addObject("double", 53.35); + row.addObject("string", "fun with ütf\0"); + row.addObject("varchar", "árvíztűrő tükörfúrógép"); + row.addObject("binary-array", new byte[] { 0, 1, 2, 3, 4 }); + ByteBuffer binaryBuffer = ByteBuffer.wrap(new byte[] { 5, 6, 7, 8, 9 }); + row.addObject("binary-bytebuffer", binaryBuffer); + row.addObject("null", null); + row.addObject("decimal", BigDecimal.valueOf(12345, 3)); + + PartialRow expected = getPartialRowWithAllTypes(); + for (ColumnSchema col : schema.getColumns()) { + assertEquals(callGetByName(expected, col.getName(), col.getType()), + callGetByName(row, col.getName(), col.getType())); + } + } + + @Test(expected = IllegalArgumentException.class) + public void testGetNullColumn() { + PartialRow partialRow = getPartialRowWithAllTypes(); + assertTrue(partialRow.isSet("null")); + assertTrue(partialRow.isNull("null")); + partialRow.getString("null"); + } + + @Test(expected = IllegalArgumentException.class) + public void testSetNonNullableColumn() { + PartialRow partialRow = getPartialRowWithAllTypes(); + partialRow.setNull("int32"); + } + + @Test + public void testGetUnsetColumn() { + Schema schema = getSchemaWithAllTypes(); + PartialRow partialRow = schema.newPartialRow(); + for (ColumnSchema columnSchema : schema.getColumns()) { + assertFalse(partialRow.isSet("null")); + assertFalse(partialRow.isNull("null")); + try { + callGetByName(partialRow, columnSchema.getName(), columnSchema.getType()); + fail("Expected IllegalArgumentException for type: " + columnSchema.getType()); + } catch (IllegalArgumentException ex) { + // This is the expected exception. + } + } + } + + @Test + public void testGetMissingColumnName() { + PartialRow partialRow = getPartialRowWithAllTypes(); + for (ColumnSchema columnSchema : partialRow.getSchema().getColumns()) { + try { + callGetByName(partialRow, "not-a-column", columnSchema.getType()); + fail("Expected IllegalArgumentException for type: " + columnSchema.getType()); + } catch (IllegalArgumentException ex) { + // This is the expected exception. + } + } + } + + @Test + public void testGetMissingColumnIndex() { + PartialRow partialRow = getPartialRowWithAllTypes(); + for (ColumnSchema columnSchema : partialRow.getSchema().getColumns()) { + try { + callGetByIndex(partialRow, 999, columnSchema.getType()); + fail("Expected IndexOutOfBoundsException for type: " + columnSchema.getType()); + } catch (IndexOutOfBoundsException ex) { + // This is the expected exception. + } + } + } + + @Test + public void testGetWrongTypeColumn() { + PartialRow partialRow = getPartialRowWithAllTypes(); + for (ColumnSchema columnSchema : partialRow.getSchema().getColumns()) { + try { + // Skip the null column because `isNull` is not type specific. + if ("null".equals(columnSchema.getName())) { + continue; + } + callGetByName(partialRow, columnSchema.getName(), getShiftedType(columnSchema.getType())); + fail("Expected IllegalArgumentException for type: " + columnSchema.getType()); + } catch (IllegalArgumentException ex) { + // This is the expected exception. + } + } + } + + @Test + public void testAddMissingColumnName() { + PartialRow partialRow = getPartialRowWithAllTypes(); + for (ColumnSchema columnSchema : partialRow.getSchema().getColumns()) { + try { + callAddByName(partialRow, "not-a-column", columnSchema.getType()); + fail("Expected IllegalArgumentException for type: " + columnSchema.getType()); + } catch (IllegalArgumentException ex) { + // This is the expected exception. + } + } + } + + @Test + public void testAddMissingColumnIndex() { + PartialRow partialRow = getPartialRowWithAllTypes(); + for (ColumnSchema columnSchema : partialRow.getSchema().getColumns()) { + try { + callAddByIndex(partialRow, 999, columnSchema.getType()); + fail("Expected IndexOutOfBoundsException for type: " + columnSchema.getType()); + } catch (IndexOutOfBoundsException ex) { + // This is the expected exception. + } + } + } + + @Test + public void testAddWrongTypeColumn() { + PartialRow partialRow = getPartialRowWithAllTypes(); + for (ColumnSchema columnSchema : partialRow.getSchema().getColumns()) { + try { + callAddByName(partialRow, columnSchema.getName(), getShiftedType(columnSchema.getType())); + fail("Expected IllegalArgumentException for type: " + columnSchema.getType()); + } catch (IllegalArgumentException ex) { + // This is the expected exception. + } + } + } + + @Test + public void testAddToFrozenRow() { + PartialRow partialRow = getPartialRowWithAllTypes(); + partialRow.freeze(); + for (ColumnSchema columnSchema : partialRow.getSchema().getColumns()) { + try { + callAddByName(partialRow, columnSchema.getName(), columnSchema.getType()); + fail("Expected IllegalStateException for type: " + columnSchema.getType()); + } catch (IllegalStateException ex) { + // This is the expected exception. + } + } + } + + @Test(expected = IllegalArgumentException.class) + public void testIsNullMissingColumnName() { + PartialRow partialRow = getPartialRowWithAllTypes(); + partialRow.isNull("not-a-column"); + } + + @Test(expected = IndexOutOfBoundsException.class) + public void testIsNullMissingColumnIndex() { + PartialRow partialRow = getPartialRowWithAllTypes(); + partialRow.isNull(999); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsSetMissingColumnName() { + PartialRow partialRow = getPartialRowWithAllTypes(); + partialRow.isSet("not-a-column"); + } + + @Test(expected = IndexOutOfBoundsException.class) + public void testIsSetMissingColumnIndex() { + PartialRow partialRow = getPartialRowWithAllTypes(); + partialRow.isSet(999); + } + + @Test(expected = IllegalArgumentException.class) + public void testAddInvalidPrecisionDecimal() { + PartialRow partialRow = getPartialRowWithAllTypes(); + partialRow.addDecimal("decimal", BigDecimal.valueOf(123456, 3)); + } + + @Test(expected = IllegalArgumentException.class) + public void testAddInvalidScaleDecimal() { + PartialRow partialRow = getPartialRowWithAllTypes(); + partialRow.addDecimal("decimal", BigDecimal.valueOf(12345, 4)); + } + + @Test(expected = IllegalArgumentException.class) + public void testAddInvalidCoercedScaleDecimal() { + PartialRow partialRow = getPartialRowWithAllTypes(); + partialRow.addDecimal("decimal", BigDecimal.valueOf(12345, 2)); + } + + @Test + public void testAddCoercedScaleAndPrecisionDecimal() { + PartialRow partialRow = getPartialRowWithAllTypes(); + partialRow.addDecimal("decimal", BigDecimal.valueOf(222, 1)); + BigDecimal decimal = partialRow.getDecimal("decimal"); + assertEquals("22.200", decimal.toString()); + } + + @Test(expected = IllegalArgumentException.class) + public void testAddDateOutOfRange() { + PartialRow partialRow = getPartialRowWithAllTypes(); + Date d = Date.valueOf(LocalDate.of(10000, 1, 1)); + partialRow.addDate("date", d); + } + + @Test + public void testToString() { + Schema schema = getSchemaWithAllTypes(); + + PartialRow row = schema.newPartialRow(); + assertEquals("()", row.toString()); + + row.addInt("int32", 42); + row.addByte("int8", (byte) 42); + + assertEquals("(int8 int8=42, int32 int32=42)", row.toString()); + + row.addString("string", "fun with ütf\0"); + assertEquals("(int8 int8=42, int32 int32=42, string string=\"fun with ütf\\0\")", + row.toString()); + + ByteBuffer binary = ByteBuffer.wrap(new byte[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + binary.position(2); + binary.limit(5); + + row.addBinary("binary-bytebuffer", binary); + assertEquals("(int8 int8=42, int32 int32=42, string string=\"fun with ütf\\0\", " + + "binary binary-bytebuffer=[2, 3, 4])", + row.toString()); + + row.addDouble("double", 52.35); + assertEquals("(int8 int8=42, int32 int32=42, double double=52.35, " + + "string string=\"fun with ütf\\0\", binary binary-bytebuffer=[2, 3, 4])", + row.toString()); + + row.addDecimal("decimal", BigDecimal.valueOf(12345, 3)); + assertEquals("(int8 int8=42, int32 int32=42, double double=52.35, " + + "string string=\"fun with ütf\\0\", binary binary-bytebuffer=[2, 3, 4], " + + "decimal(5, 3) decimal=12.345)", + row.toString()); + + row.addVarchar("varchar", "árvíztűrő tükörfúrógép"); + assertEquals("(int8 int8=42, int32 int32=42, double double=52.35, " + + "string string=\"fun with ütf\\0\", binary binary-bytebuffer=[2, 3, 4], " + + "decimal(5, 3) decimal=12.345, varchar(10) varchar=\"árvíztűrő \")", + row.toString()); + + PartialRow row2 = schema.newPartialRow(); + assertEquals("()", row2.toString()); + row2.addDate("date", Date.valueOf(LocalDate.ofEpochDay(0))); + assertEquals("(date date=1970-01-01)", row2.toString()); + } + + @Test + public void testIncrementColumn() { + PartialRow partialRow = getPartialRowWithAllTypes(); + + // Boolean + int boolIndex = getColumnIndex(partialRow, "bool"); + partialRow.addBoolean(boolIndex, false); + assertTrue(partialRow.incrementColumn(boolIndex)); + assertEquals(true, partialRow.getBoolean(boolIndex)); + assertFalse(partialRow.incrementColumn(boolIndex)); + + // Int8 + int int8Index = getColumnIndex(partialRow, "int8"); + partialRow.addByte(int8Index, (byte)(Byte.MAX_VALUE - 1)); + assertTrue(partialRow.incrementColumn(int8Index)); + assertEquals(Byte.MAX_VALUE, partialRow.getByte(int8Index)); + assertFalse(partialRow.incrementColumn(int8Index)); + + // Int16 + int int16Index = getColumnIndex(partialRow, "int16"); + partialRow.addShort(int16Index, (short)(Short.MAX_VALUE - 1)); + assertTrue(partialRow.incrementColumn(int16Index)); + assertEquals(Short.MAX_VALUE, partialRow.getShort(int16Index)); + assertFalse(partialRow.incrementColumn(int16Index)); + + // Int32 + int int32Index = getColumnIndex(partialRow, "int32"); + partialRow.addInt(int32Index, Integer.MAX_VALUE - 1); + assertTrue(partialRow.incrementColumn(int32Index)); + assertEquals(Integer.MAX_VALUE, partialRow.getInt(int32Index)); + assertFalse(partialRow.incrementColumn(int32Index)); + + // Int64 + int int64Index = getColumnIndex(partialRow, "int64"); + partialRow.addLong(int64Index, Long.MAX_VALUE - 1); + assertTrue(partialRow.incrementColumn(int64Index)); + assertEquals(Long.MAX_VALUE, partialRow.getLong(int64Index)); + assertFalse(partialRow.incrementColumn(int64Index)); + + // Float + int floatIndex = getColumnIndex(partialRow, "float"); + partialRow.addFloat(floatIndex, Float.MAX_VALUE); + assertTrue(partialRow.incrementColumn(floatIndex)); + assertEquals(Float.POSITIVE_INFINITY, partialRow.getFloat(floatIndex), 0.0f); + assertFalse(partialRow.incrementColumn(floatIndex)); + + // Float + int doubleIndex = getColumnIndex(partialRow, "double"); + partialRow.addDouble(doubleIndex, Double.MAX_VALUE); + assertTrue(partialRow.incrementColumn(doubleIndex)); + assertEquals(Double.POSITIVE_INFINITY, partialRow.getDouble(doubleIndex), 0.0); + assertFalse(partialRow.incrementColumn(doubleIndex)); + + // Decimal + int decimalIndex = getColumnIndex(partialRow, "decimal"); + // Decimal with precision 5, scale 3 has a max of 99.999 + partialRow.addDecimal(decimalIndex, new BigDecimal("99.998")); + assertTrue(partialRow.incrementColumn(decimalIndex)); + assertEquals(new BigDecimal("99.999"), partialRow.getDecimal(decimalIndex)); + assertFalse(partialRow.incrementColumn(decimalIndex)); + + // String + int stringIndex = getColumnIndex(partialRow, "string"); + partialRow.addString(stringIndex, "hello"); + assertTrue(partialRow.incrementColumn(stringIndex)); + assertEquals("hello\0", partialRow.getString(stringIndex)); + + // Binary + int binaryIndex = getColumnIndex(partialRow, "binary-array"); + partialRow.addBinary(binaryIndex, new byte[] { 0, 1, 2, 3, 4 }); + assertTrue(partialRow.incrementColumn(binaryIndex)); + assertArrayEquals(new byte[] { 0, 1, 2, 3, 4, 0 }, partialRow.getBinaryCopy(binaryIndex)); + + // Varchar + int varcharIndex = getColumnIndex(partialRow, "varchar"); + partialRow.addVarchar(varcharIndex, "hello"); + assertTrue(partialRow.incrementColumn(varcharIndex)); + assertEquals("hello\0", partialRow.getVarchar(varcharIndex)); + + // Date + int dateIndex = getColumnIndex(partialRow, "date"); + partialRow.addDate(dateIndex, DateUtil.epochDaysToSqlDate(DateUtil.MAX_DATE_VALUE - 1)); + assertTrue(partialRow.incrementColumn(dateIndex)); + Date maxDate = DateUtil.epochDaysToSqlDate(DateUtil.MAX_DATE_VALUE); + assertEquals(maxDate, partialRow.getDate(dateIndex)); + assertFalse(partialRow.incrementColumn(dateIndex)); + } + + @Test + public void testSetMin() { + PartialRow partialRow = getPartialRowWithAllTypes(); + for (int i = 0; i < partialRow.getSchema().getColumnCount(); i++) { + partialRow.setMin(i); + } + assertEquals(false, partialRow.getBoolean("bool")); + assertEquals(Byte.MIN_VALUE, partialRow.getByte("int8")); + assertEquals(Short.MIN_VALUE, partialRow.getShort("int16")); + assertEquals(Integer.MIN_VALUE, partialRow.getInt("int32")); + assertEquals(Long.MIN_VALUE, partialRow.getLong("int64")); + assertEquals(Long.MIN_VALUE, partialRow.getLong("timestamp")); + assertEquals(DateUtil.epochDaysToSqlDate(DateUtil.MIN_DATE_VALUE), partialRow.getDate("date")); + assertEquals(-Float.MAX_VALUE, partialRow.getFloat("float"), 0.0f); + assertEquals(-Double.MAX_VALUE, partialRow.getDouble("double"), 0.0); + assertEquals("", partialRow.getString("string")); + assertEquals("", partialRow.getVarchar("varchar")); + assertArrayEquals(new byte[0], partialRow.getBinaryCopy("binary-array")); + assertArrayEquals(new byte[0], partialRow.getBinaryCopy("binary-bytebuffer")); + assertEquals(BigDecimal.valueOf(-99999, 3), partialRow.getDecimal("decimal")); + } + + private int getColumnIndex(PartialRow partialRow, String columnName) { + return partialRow.getSchema().getColumnIndex(columnName); + } + + // Shift the type one position to force the wrong type for all types. + private Type getShiftedType(Type type) { + int shiftedPosition = (type.ordinal() + 1) % Type.values().length; + return Type.values()[shiftedPosition]; + } + + private Object callGetByName(PartialRow partialRow, String columnName, Type type) { + if (partialRow.isNull(columnName)) { + return null; + } + switch (type) { + case INT8: return partialRow.getByte(columnName); + case INT16: return partialRow.getShort(columnName); + case INT32: return partialRow.getInt(columnName); + case INT64: return partialRow.getLong(columnName); + case DATE: return partialRow.getDate(columnName); + case UNIXTIME_MICROS: return partialRow.getTimestamp(columnName); + case VARCHAR: return partialRow.getVarchar(columnName); + case STRING: return partialRow.getString(columnName); + case BINARY: return partialRow.getBinary(columnName); + case FLOAT: return partialRow.getFloat(columnName); + case DOUBLE: return partialRow.getDouble(columnName); + case BOOL: return partialRow.getBoolean(columnName); + case DECIMAL: return partialRow.getDecimal(columnName); + default: + throw new UnsupportedOperationException(); + } + } + + private Object callGetByIndex(PartialRow partialRow, int columnIndex, Type type) { + if (partialRow.isNull(columnIndex)) { + return null; + } + switch (type) { + case INT8: return partialRow.getByte(columnIndex); + case INT16: return partialRow.getShort(columnIndex); + case INT32: return partialRow.getInt(columnIndex); + case INT64: return partialRow.getLong(columnIndex); + case DATE: return partialRow.getDate(columnIndex); + case UNIXTIME_MICROS: return partialRow.getTimestamp(columnIndex); + case VARCHAR: return partialRow.getVarchar(columnIndex); + case STRING: return partialRow.getString(columnIndex); + case BINARY: return partialRow.getBinary(columnIndex); + case FLOAT: return partialRow.getFloat(columnIndex); + case DOUBLE: return partialRow.getDouble(columnIndex); + case BOOL: return partialRow.getBoolean(columnIndex); + case DECIMAL: return partialRow.getDecimal(columnIndex); + default: + throw new UnsupportedOperationException(); + } + } + + private void callAddByName(PartialRow partialRow, String columnName, Type type) { + switch (type) { + case INT8: + partialRow.addByte(columnName, (byte) 42); + break; + case INT16: + partialRow.addShort(columnName, (short) 43); + break; + case INT32: + partialRow.addInt(columnName, 44); + break; + case INT64: + partialRow.addLong(columnName, 45); + break; + case UNIXTIME_MICROS: + partialRow.addTimestamp(columnName, new Timestamp(1234567890)); + break; + case VARCHAR: + partialRow.addVarchar(columnName, "fun with ütf\0"); + break; + case STRING: + partialRow.addString(columnName, "fun with ütf\0"); + break; + case BINARY: + partialRow.addBinary(columnName, new byte[] { 0, 1, 2, 3, 4 }); + break; + case FLOAT: + partialRow.addFloat(columnName, 52.35F); + break; + case DOUBLE: + partialRow.addDouble(columnName, 53.35); + break; + case BOOL: + partialRow.addBoolean(columnName, true); + break; + case DECIMAL: + partialRow.addDecimal(columnName, BigDecimal.valueOf(12345, 3)); + break; + case DATE: + partialRow.addDate(columnName, DateUtil.epochDaysToSqlDate(0)); + break; + default: + throw new UnsupportedOperationException(); + } + } + + private void callAddByIndex(PartialRow partialRow, int columnIndex, Type type) { + switch (type) { + case INT8: + partialRow.addByte(columnIndex, (byte) 42); + break; + case INT16: + partialRow.addShort(columnIndex, (short) 43); + break; + case INT32: + partialRow.addInt(columnIndex, 44); + break; + case INT64: + partialRow.addLong(columnIndex, 45); + break; + case UNIXTIME_MICROS: + partialRow.addTimestamp(columnIndex, new Timestamp(1234567890)); + break; + case VARCHAR: + partialRow.addVarchar(columnIndex, "fun with ütf\0"); + break; + case STRING: + partialRow.addString(columnIndex, "fun with ütf\0"); + break; + case BINARY: + partialRow.addBinary(columnIndex, new byte[] { 0, 1, 2, 3, 4 }); + break; + case FLOAT: + partialRow.addFloat(columnIndex, 52.35F); + break; + case DOUBLE: + partialRow.addDouble(columnIndex, 53.35); + break; + case BOOL: + partialRow.addBoolean(columnIndex, true); + break; + case DECIMAL: + partialRow.addDecimal(columnIndex, BigDecimal.valueOf(12345, 3)); + break; + case DATE: + partialRow.addDate(columnIndex, DateUtil.epochDaysToSqlDate(0)); + break; + default: + throw new UnsupportedOperationException(); + } + } + +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestPartitionPruner.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestPartitionPruner.java new file mode 100644 index 0000000000..962bef2b0c --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestPartitionPruner.java @@ -0,0 +1,1377 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import com.google.common.collect.ImmutableList; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.KuduPredicate.ComparisonOp; +import org.apache.kudu.test.KuduTestHarness; + +public class TestPartitionPruner { + public static final Logger LOG = LoggerFactory.getLogger(TestPartitionPruner.class); + + private KuduClient client; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + } + + /** + * This method is an obsolete implementation of PartitionPruner.java#pruneHashComponent. + * The implementation is inefficient and may cause OOM. + * + * Search all combinations of in-list and equality predicates for pruneable hash partitions. + * This method is used just for test to check new algorithm's correctness. + * @deprecated we can remove it later, now just for test. + * @return a bitset containing {@code false} bits for hash buckets which may be pruned + */ + private static BitSet pruneHashComponent(Schema schema, + PartitionSchema.HashBucketSchema hashSchema, + Map predicates) { + BitSet hashBuckets = new BitSet(hashSchema.getNumBuckets()); + List columnIdxs = + PartitionPruner.idsToIndexesForTest(schema, hashSchema.getColumnIds()); + for (int idx : columnIdxs) { + ColumnSchema column = schema.getColumnByIndex(idx); + KuduPredicate predicate = predicates.get(column.getName()); + if (predicate == null || + (predicate.getType() != KuduPredicate.PredicateType.EQUALITY && + predicate.getType() != KuduPredicate.PredicateType.IN_LIST)) { + hashBuckets.set(0, hashSchema.getNumBuckets()); + return hashBuckets; + } + } + + List rows = Arrays.asList(schema.newPartialRow()); + for (int idx : columnIdxs) { + List newRows = new ArrayList<>(); + ColumnSchema column = schema.getColumnByIndex(idx); + KuduPredicate predicate = predicates.get(column.getName()); + List predicateValues; + if (predicate.getType() == KuduPredicate.PredicateType.EQUALITY) { + predicateValues = Collections.singletonList(predicate.getLower()); + } else { + predicateValues = Arrays.asList(predicate.getInListValues()); + } + // For each of the encoded string, replicate it by the number of values in + // equality and in-list predicate. + for (PartialRow row : rows) { + for (byte[] predicateValue : predicateValues) { + PartialRow newRow = new PartialRow(row); + newRow.setRaw(idx, predicateValue); + newRows.add(newRow); + } + } + rows = newRows; + } + for (PartialRow row : rows) { + int hash = KeyEncoder.getHashBucket(row, hashSchema); + hashBuckets.set(hash); + } + return hashBuckets; + } + + static class ReturnValueHelper { + private Schema schema; + private PartitionSchema partitionSchema; + private Map predicates; + + public ReturnValueHelper(Schema schema, PartitionSchema partitionSchema, + Map predicates) { + this.schema = schema; + this.partitionSchema = partitionSchema; + this.predicates = predicates; + } + } + + // Prepare test cases for unit tests to test large in-list predicates. + public List prepareForLargeInListPredicates(KuduClient client, + String tablePrefix, int totalCount, int inListMaxLength) throws KuduException { + final int columnSize = 200; + String keyNamePrefix = "key"; + final int keyColumnNumber = 10; + List columnSchemas = new ArrayList<>(); + List keyColumnNames = new ArrayList<>(); + List keyColumnSchemas = new ArrayList<>(); + for (int i = 0; i < columnSize; i++) { + boolean isKey = false; + String columnName = keyNamePrefix + i; + if (i < keyColumnNumber) { + isKey = true; + } + ColumnSchema columnSchema = new ColumnSchema.ColumnSchemaBuilder(columnName, Type.INT32) + .key(isKey).build(); + if (isKey) { + keyColumnNames.add(columnName); + keyColumnSchemas.add(columnSchema); + } + columnSchemas.add(columnSchema); + } + + final Schema schema = new Schema(columnSchemas); + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(new ArrayList<>()); + tableBuilder.addHashPartitions(ImmutableList.of(keyColumnNames.get(0)), 2); + tableBuilder.addHashPartitions(ImmutableList.of( + keyColumnNames.get(1), keyColumnNames.get(2)), 2); + tableBuilder.addHashPartitions( + ImmutableList.of(keyColumnNames.get(3), keyColumnNames.get(4)), 3); + tableBuilder.addHashPartitions( + ImmutableList.of( + keyColumnNames.get(5), keyColumnNames.get(6), keyColumnNames.get(7), + keyColumnNames.get(8), keyColumnNames.get(9)), + 2); + + String tableName = tablePrefix + "-" + System.currentTimeMillis(); + client.createTable(tableName, schema, tableBuilder); + KuduTable table = client.openTable(tableName); + + final int keyColumnCount = schema.getPrimaryKeyColumnCount(); + assertEquals(keyColumnNumber, keyColumnCount); + List helpList = new ArrayList<>(); + for (int index = 1; index <= totalCount; index++) { + List> testCases = new ArrayList<>(); + Random r = new Random(System.currentTimeMillis()); + for (int i = 0; i < keyColumnCount; i++) { + int inListLength = 1 + r.nextInt(inListMaxLength); + List testCase = new ArrayList<>(); + for (int j = 0; j < inListLength; j++) { + testCase.add(r.nextInt()); + } + testCases.add(testCase); + } + + KuduScanner.KuduScannerBuilder scanBuilder = client.newScannerBuilder(table); + Schema scanSchema = scanBuilder.table.getSchema(); + PartitionSchema partitionSchema = scanBuilder.table.getPartitionSchema(); + for (int i = 0; i < keyColumnCount; i++) { + KuduPredicate pred = KuduPredicate.newInListPredicate(keyColumnSchemas.get(i), + ImmutableList.copyOf(testCases.get(i))); + scanBuilder.addPredicate(pred); + } + helpList.add(new ReturnValueHelper(scanSchema, partitionSchema, scanBuilder.predicates)); + } + return helpList; + } + + /** + * Counts the partitions touched by a scan with optional primary key bounds. + * The table is assumed to have three INT8 columns as the primary key. + * + * @param expectedTablets the expected number of tablets to satisfy the scan + * @param table the table to scan + * @param partitions the partitions of the table + * @param lowerBoundPrimaryKey the optional lower bound primary key + * @param upperBoundPrimaryKey the optional upper bound primary key + */ + private void checkPartitionsPrimaryKey(int expectedTablets, + KuduTable table, + List partitions, + byte[] lowerBoundPrimaryKey, + byte[] upperBoundPrimaryKey) throws Exception { + KuduScanToken.KuduScanTokenBuilder scanBuilder = client.newScanTokenBuilder(table); + + if (lowerBoundPrimaryKey != null) { + PartialRow lower = table.getSchema().newPartialRow(); + for (int i = 0; i < 3; i++) { + lower.addByte(i, lowerBoundPrimaryKey[i]); + } + scanBuilder.lowerBound(lower); + } + + if (upperBoundPrimaryKey != null) { + PartialRow upper = table.getSchema().newPartialRow(); + for (int i = 0; i < 3; i++) { + upper.addByte(i, upperBoundPrimaryKey[i]); + } + scanBuilder.exclusiveUpperBound(upper); + } + + PartitionPruner pruner = PartitionPruner.create(scanBuilder); + + int scannedPartitions = 0; + for (Partition partition : partitions) { + if (!pruner.shouldPruneForTests(partition)) { + scannedPartitions++; + } + } + + // Check that the number of ScanTokens built for the scan matches. + assertEquals(expectedTablets, scannedPartitions); + assertEquals(scannedPartitions, scanBuilder.build().size()); + assertEquals(expectedTablets == 0 ? 0 : 1, pruner.numRangesRemainingForTests()); + } + + /** + * Checks the number of tablets and pruner ranges generated for a scan. + * + * @param expectedTablets the expected number of tablets to satisfy the scan + * @param expectedPrunerRanges the expected number of generated partition pruner ranges + * @param table the table to scan + * @param partitions the partitions of the table + * @param predicates the predicates to apply to the scan + */ + private void checkPartitions(int expectedTablets, + int expectedPrunerRanges, + KuduTable table, + List partitions, + KuduPredicate... predicates) { + checkPartitions(expectedTablets, + expectedPrunerRanges, + table, + partitions, + null, + null, + predicates); + } + + /** + * Checks the number of tablets and pruner ranges generated for a scan with + * predicates and optional partition key bounds. + * + * @param expectedTablets the expected number of tablets to satisfy the scan + * @param expectedPrunerRanges the expected number of generated partition pruner ranges + * @param table the table to scan + * @param partitions the partitions of the table + * @param lowerBoundPartitionKey an optional lower bound partition key + * @param upperBoundPartitionKey an optional upper bound partition key + * @param predicates the predicates to apply to the scan + */ + private void checkPartitions(int expectedTablets, + int expectedPrunerRanges, + KuduTable table, + List partitions, + byte[] lowerBoundPartitionKey, + byte[] upperBoundPartitionKey, + KuduPredicate... predicates) { + // Partition key bounds can't be applied to the ScanTokenBuilder. + KuduScanner.KuduScannerBuilder scanBuilder = client.newScannerBuilder(table); + + for (KuduPredicate predicate : predicates) { + scanBuilder.addPredicate(predicate); + } + + if (lowerBoundPartitionKey != null) { + scanBuilder.lowerBoundPartitionKeyRaw(lowerBoundPartitionKey); + } + if (upperBoundPartitionKey != null) { + scanBuilder.exclusiveUpperBoundPartitionKeyRaw(upperBoundPartitionKey); + } + + PartitionPruner pruner = PartitionPruner.create(scanBuilder); + + int scannedPartitions = 0; + for (Partition partition : partitions) { + if (!pruner.shouldPruneForTests(partition)) { + scannedPartitions++; + } + } + + assertEquals(expectedTablets, scannedPartitions); + assertEquals(expectedPrunerRanges, pruner.numRangesRemainingForTests()); + + // Check that the scan token builder comes up with the same amount. + // The scan token builder does not allow for upper/lower partition keys. + if (lowerBoundPartitionKey == null && upperBoundPartitionKey == null) { + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + + for (KuduPredicate predicate : predicates) { + tokenBuilder.addPredicate(predicate); + } + + // Check that the number of ScanTokens built for the scan matches. + assertEquals(expectedTablets, tokenBuilder.build().size()); + } + } + + /** + * Retrieves the partitions of a table. + * + * @param table the table + * @return the partitions of the table + */ + private List getTablePartitions(KuduTable table) { + List partitions = new ArrayList<>(); + for (KuduScanToken token : client.newScanTokenBuilder(table).build()) { + partitions.add(token.getTablet().getPartition()); + } + return partitions; + } + + @Test + public void testPrimaryKeyRangePruning() throws Exception { + // CREATE TABLE t + // (a INT8, b INT8, c INT8) + // PRIMARY KEY (a, b, c)) + // PARTITION BY RANGE (a, b, c) + // (PARTITION VALUES < (0, 0, 0), + // PARTITION (0, 0, 0) <= VALUES < (10, 10, 10) + // PARTITION (10, 10, 10) <= VALUES); + + ArrayList columns = new ArrayList<>(3); + columns.add(new ColumnSchema.ColumnSchemaBuilder("a", Type.INT8).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("b", Type.INT8).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c", Type.INT8).key(true).build()); + Schema schema = new Schema(columns); + + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(ImmutableList.of("a", "b", "c")); + + PartialRow split = schema.newPartialRow(); + split.addByte("a", (byte) 0); + split.addByte("b", (byte) 0); + split.addByte("c", (byte) 0); + tableBuilder.addSplitRow(split); + split.addByte("a", (byte) 10); + split.addByte("b", (byte) 10); + split.addByte("c", (byte) 10); + tableBuilder.addSplitRow(split); + + String tableName = "testPrimaryKeyRangePruning-" + System.currentTimeMillis(); + + client.createTable(tableName, schema, tableBuilder); + KuduTable table = client.openTable(tableName); + List partitions = getTablePartitions(table); + + byte min = Byte.MIN_VALUE; + + // No bounds + checkPartitionsPrimaryKey(3, table, partitions, + null, null); + + // PK < (-1, min, min) + checkPartitionsPrimaryKey(1, table, partitions, + null, new byte[] { -1, min, min }); + + // PK < (0, 0, 0) + checkPartitionsPrimaryKey(1, table, partitions, + null, new byte[] { 0, 0, 0 }); + + // PK < (0, 0, min) + checkPartitionsPrimaryKey(1, table, partitions, + null, new byte[] { 0, 0, min }); + + // PK < (10, 10, 10) + checkPartitionsPrimaryKey(2, table, partitions, + null, new byte[] { 10, 10, 10 }); + + // PK < (100, min, min) + checkPartitionsPrimaryKey(3, table, partitions, + null, new byte[] { 100, min, min }); + + // PK >= (-10, -10, -10) + checkPartitionsPrimaryKey(3, table, partitions, + new byte[] { -10, -10, -10 }, null); + + // PK >= (0, 0, 0) + checkPartitionsPrimaryKey(2, table, partitions, + new byte[] { 0, 0, 0 }, null); + + // PK >= (100, 0, 0) + checkPartitionsPrimaryKey(1, table, partitions, + new byte[] { 100, 0, 0 }, null); + + // PK >= (-10, 0, 0) + // PK < (100, 0, 0) + checkPartitionsPrimaryKey(3, table, partitions, + new byte[] { -10, 0, 0 }, new byte[] { 100, 0, 0 }); + + // PK >= (0, 0, 0) + // PK < (10, 10, 10) + checkPartitionsPrimaryKey(1, table, partitions, + new byte[] { 0, 0, 0 }, new byte[] { 10, 0, 0 }); + + // PK >= (0, 0, 0) + // PK < (10, 10, 11) + checkPartitionsPrimaryKey(1, table, partitions, + new byte[] { 0, 0, 0 }, new byte[] { 10, 0, 0 }); + + // PK < (0, 0, 0) + // PK >= (10, 10, 11) + checkPartitionsPrimaryKey(0, table, partitions, + new byte[] { 10, 0, 0 }, new byte[] { 0, 0, 0 }); + } + + @Test + public void testPrimaryKeyPrefixRangePruning() throws Exception { + // CREATE TABLE t + // (a INT8, b INT8, c INT8) + // PRIMARY KEY (a, b, c)) + // PARTITION BY RANGE (a, b) + // (PARTITION VALUES < (0, 0, 0)); + + ArrayList columns = new ArrayList<>(3); + columns.add(new ColumnSchema.ColumnSchemaBuilder("a", Type.INT8).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("b", Type.INT8).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c", Type.INT8).key(true).build()); + Schema schema = new Schema(columns); + + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(ImmutableList.of("a", "b")); + + PartialRow split = schema.newPartialRow(); + split.addByte("a", (byte) 0); + split.addByte("b", (byte) 0); + tableBuilder.addSplitRow(split); + + String tableName = "testPrimaryKeyPrefixRangePruning-" + System.currentTimeMillis(); + + client.createTable(tableName, schema, tableBuilder); + KuduTable table = client.openTable(tableName); + List partitions = getTablePartitions(table); + + final byte min = Byte.MIN_VALUE; + final byte max = Byte.MAX_VALUE; + + // No bounds + checkPartitionsPrimaryKey(2, table, partitions, + null, null); + + // PK < (-1, min, min) + // TODO(KUDU-2178): prune the upper partition. + checkPartitionsPrimaryKey(2, table, partitions, + null, new byte[] { -1, min, min }); + + // PK < (0, 0, min) + // TODO(KUDU-2178): prune the upper partition. + checkPartitionsPrimaryKey(2, table, partitions, + null, new byte[] { 0, 0, min }); + + // PK < (0, 0, 0) + checkPartitionsPrimaryKey(2, table, partitions, + null, new byte[] { 0, 0, 0 }); + + // PK < (0, 1, min) + checkPartitionsPrimaryKey(2, table, partitions, + null, new byte[] { 0, 1, min }); + + // PK < (0, 1, 0) + checkPartitionsPrimaryKey(2, table, partitions, + null, new byte[] { 0, 1, 0 }); + + // PK < (max, max, min) + checkPartitionsPrimaryKey(2, table, partitions, + null, new byte[] { max, max, min }); + + // PK < (max, max, 0) + checkPartitionsPrimaryKey(2, table, partitions, + null, new byte[] { max, max, 0 }); + + // PK >= (0, 0, min) + // TODO(KUDU-2178): prune the lower partition. + checkPartitionsPrimaryKey(2, table, partitions, + new byte[] { 0, 0, min }, null); + + // PK >= (0, 0, 0) + // TODO(KUDU-2178): prune the lower partition. + checkPartitionsPrimaryKey(2, table, partitions, + new byte[] { 0, 0, 0 }, null); + + // PK >= (0, -1, 0) + checkPartitionsPrimaryKey(2, table, partitions, + new byte[] { 0, -1, 0 }, null); + } + + @Test + public void testRangePartitionPruning() throws Exception { + // CREATE TABLE t + // (a INT8, b STRING, c INT8) + // PRIMARY KEY (a, b, c)) + // PARTITION BY RANGE (c, b) + // (PARTITION VALUES < (0, "m"), + // PARTITION (0, "m") <= VALUES < (10, "r") + // PARTITION (10, "r") <= VALUES); + + ColumnSchema a = new ColumnSchema.ColumnSchemaBuilder("a", Type.INT8).key(true).build(); + ColumnSchema b = new ColumnSchema.ColumnSchemaBuilder("b", Type.STRING).key(true).build(); + ColumnSchema c = new ColumnSchema.ColumnSchemaBuilder("c", Type.INT8).key(true).build(); + Schema schema = new Schema(ImmutableList.of(a, b, c)); + + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(ImmutableList.of("c", "b")); + + PartialRow split = schema.newPartialRow(); + split.addByte("c", (byte) 0); + split.addString("b", "m"); + tableBuilder.addSplitRow(split); + split.addByte("c", (byte) 10); + split.addString("b", "r"); + tableBuilder.addSplitRow(split); + + String tableName = "testRangePartitionPruning-" + System.currentTimeMillis(); + client.createTable(tableName, schema, tableBuilder); + KuduTable table = client.openTable(tableName); + List partitions = getTablePartitions(table); + + // No Predicates + checkPartitions(3, 1, table, partitions); + + // c < -10 + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.LESS, -10)); + + // c = -10 + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, -10)); + + // c < 10 + checkPartitions(2, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.LESS, 10)); + + // c < 100 + checkPartitions(3, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.LESS, 100)); + + // c < MIN + checkPartitions(0, 0, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.LESS, Byte.MIN_VALUE)); + // c < MAX + checkPartitions(3, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.LESS, Byte.MAX_VALUE)); + + // c >= -10 + checkPartitions(3, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.GREATER_EQUAL, -10)); + + // c >= 0 + checkPartitions(3, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.GREATER_EQUAL, -10)); + + // c >= 5 + checkPartitions(2, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.GREATER_EQUAL, 5)); + + // c >= 10 + checkPartitions(2, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.GREATER_EQUAL, 10)); + + // c >= 100 + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.GREATER_EQUAL, 100)); + + // c >= MIN + checkPartitions(3, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.GREATER_EQUAL, Byte.MIN_VALUE)); + // c >= MAX + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.GREATER_EQUAL, Byte.MAX_VALUE)); + + // c >= -10 + // c < 0 + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.GREATER_EQUAL, -10), + KuduPredicate.newComparisonPredicate(c, ComparisonOp.LESS, 0)); + + // c >= 5 + // c < 100 + checkPartitions(2, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.GREATER_EQUAL, 5), + KuduPredicate.newComparisonPredicate(c, ComparisonOp.LESS, 100)); + + // b = "" + checkPartitions(3, 1, table, partitions, + KuduPredicate.newComparisonPredicate(b, ComparisonOp.EQUAL, "")); + + // b >= "z" + checkPartitions(3, 1, table, partitions, + KuduPredicate.newComparisonPredicate(b, ComparisonOp.GREATER_EQUAL, "z")); + + // b < "a" + checkPartitions(3, 1, table, partitions, + KuduPredicate.newComparisonPredicate(b, ComparisonOp.LESS, "a")); + + // b >= "m" + // b < "z" + checkPartitions(3, 1, table, partitions, + KuduPredicate.newComparisonPredicate(b, ComparisonOp.GREATER_EQUAL, "m"), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.LESS, "z")); + + // c >= 10 + // b >= "r" + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.GREATER_EQUAL, 10), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.GREATER_EQUAL, "r")); + + // c >= 10 + // b < "r" + checkPartitions(2, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.GREATER_EQUAL, 10), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.LESS, "r")); + + // c = 10 + // b < "r" + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, 10), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.LESS, "r")); + + // c < 0 + // b < "m" + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, 0), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.LESS, "m")); + + // c < 0 + // b < "z" + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.LESS, 0), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.LESS, "z")); + + // c = 0 + // b = "m\0" + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, 0), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.EQUAL, "m\0")); + + // c = 0 + // b < "m" + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, 0), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.LESS, "m")); + + // c = 0 + // b < "m\0" + checkPartitions(2, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, 0), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.LESS, "m\0")); + + // c = 0 + // c = 2 + checkPartitions(0, 0, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, 0), + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, 2)); + + // c = MIN + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, Byte.MIN_VALUE)); + // c = MAX + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, Byte.MAX_VALUE)); + + // c IN (1, 2) + checkPartitions(1, 1, table, partitions, + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 1, (byte) 2))); + + // c IN (0, 1, 2) + checkPartitions(2, 1, table, partitions, + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 0, (byte) 1, (byte) 2))); + + // c IN (-10, 0) + // b < "m" + checkPartitions(1, 1, table, partitions, + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) -10, (byte) 0)), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.LESS, "m")); + + // c IN (-10, 0) + // b < "m\0" + checkPartitions(2, 1, table, partitions, + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) -10, (byte) 0)), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.LESS, "m\0")); + } + + @Test + public void testHashPartitionPruning() throws Exception { + // CREATE TABLE t + // (a INT8, b INT8, c INT8) + // PRIMARY KEY (a, b, c) + // PARTITION BY HASH (a) PARTITIONS 2, + // HASH (b, c) PARTITIONS 2; + + ColumnSchema a = new ColumnSchema.ColumnSchemaBuilder("a", Type.INT8).key(true).build(); + ColumnSchema b = new ColumnSchema.ColumnSchemaBuilder("b", Type.INT8).key(true).build(); + ColumnSchema c = new ColumnSchema.ColumnSchemaBuilder("c", Type.INT8).key(true).build(); + final Schema schema = new Schema(ImmutableList.of(a, b, c)); + + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(new ArrayList<>()); + tableBuilder.addHashPartitions(ImmutableList.of("a"), 2); + tableBuilder.addHashPartitions(ImmutableList.of("b", "c"), 2); + + String tableName = "testHashPartitionPruning-" + System.currentTimeMillis(); + client.createTable(tableName, schema, tableBuilder); + KuduTable table = client.openTable(tableName); + List partitions = getTablePartitions(table); + + // No Predicates + checkPartitions(4, 1, table, partitions); + + // a = 0; + checkPartitions(2, 1, table, partitions, + KuduPredicate.newComparisonPredicate(a, ComparisonOp.EQUAL, 0)); + + // a >= 0; + checkPartitions(4, 1, table, partitions, + KuduPredicate.newComparisonPredicate(a, ComparisonOp.GREATER_EQUAL, 0)); + + // a >= 0; + // a < 1; + checkPartitions(2, 1, table, partitions, + KuduPredicate.newComparisonPredicate(a, ComparisonOp.GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(a, ComparisonOp.LESS, 1)); + + // a >= 0; + // a < 2; + checkPartitions(4, 1, table, partitions, + KuduPredicate.newComparisonPredicate(a, ComparisonOp.GREATER_EQUAL, 0), + KuduPredicate.newComparisonPredicate(a, ComparisonOp.LESS, 2)); + + // b = 1; + checkPartitions(4, 1, table, partitions, + KuduPredicate.newComparisonPredicate(b, ComparisonOp.EQUAL, 1)); + + // b = 1; + // c = 2; + checkPartitions(2, 2, table, partitions, + KuduPredicate.newComparisonPredicate(b, ComparisonOp.EQUAL, 1), + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, 2)); + + // a = 0; + // b = 1; + // c = 2; + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(a, ComparisonOp.EQUAL, 0), + KuduPredicate.newComparisonPredicate(b, ComparisonOp.EQUAL, 1), + KuduPredicate.newComparisonPredicate(c, ComparisonOp.EQUAL, 2)); + + // a IN (0, 10) + checkPartitions(4, 1, table, partitions, + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 0, (byte) 10))); + } + + @Test + public void testInListHashPartitionPruning() throws Exception { + // CREATE TABLE t + // (a INT8, b INT8, c INT8) + // PRIMARY KEY (a, b, c) + // PARTITION BY HASH (a) PARTITIONS 3, + // HASH (b) PARTITIONS 3, + // HASH (c) PARTITIONS 3; + ColumnSchema a = new ColumnSchema.ColumnSchemaBuilder("a", Type.INT8).key(true).build(); + ColumnSchema b = new ColumnSchema.ColumnSchemaBuilder("b", Type.INT8).key(true).build(); + ColumnSchema c = new ColumnSchema.ColumnSchemaBuilder("c", Type.INT8).key(true).build(); + final Schema schema = new Schema(ImmutableList.of(a, b, c)); + + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(new ArrayList<>()); + tableBuilder.addHashPartitions(ImmutableList.of("a"), 3); + tableBuilder.addHashPartitions(ImmutableList.of("b"), 3); + tableBuilder.addHashPartitions(ImmutableList.of("c"), 3); + + String tableName = "testInListHashPartitionPruning-" + System.currentTimeMillis(); + client.createTable(tableName, schema, tableBuilder); + KuduTable table = client.openTable(tableName); + List partitions = getTablePartitions(table); + + // a in [0, 1]; + checkPartitions(18, 2, table, partitions, + KuduPredicate.newInListPredicate(a, ImmutableList.of((byte) 0, (byte) 1))); + + // a in [0, 1, 8]; + checkPartitions(27, 1, table, partitions, + KuduPredicate.newInListPredicate(a, ImmutableList.of((byte) 0, (byte) 1, (byte) 8))); + + // b in [0, 1]; + checkPartitions(18, 6, table, partitions, + KuduPredicate.newInListPredicate(b, ImmutableList.of((byte) 0, (byte) 1))); + + // c in [0, 1]; + checkPartitions(18, 18, table, partitions, + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 0, (byte) 1))); + + // b in [0, 1], c in [0, 1]; + checkPartitions(12, 12, table, partitions, + KuduPredicate.newInListPredicate(b, ImmutableList.of((byte) 0, (byte) 1)), + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 0, (byte) 1))); + + // a in [0, 1], b in [0, 1], c in [0, 1]; + checkPartitions(8, 8, table, partitions, + KuduPredicate.newInListPredicate(a, ImmutableList.of((byte) 0, (byte) 1)), + KuduPredicate.newInListPredicate(b, ImmutableList.of((byte) 0, (byte) 1)), + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 0, (byte) 1))); + + // a in [0, 1, 2], b in [0, 1, 2], c in [0, 1, 2]; + checkPartitions(8, 8, table, partitions, + KuduPredicate.newInListPredicate(a, ImmutableList.of((byte) 0, (byte) 1, (byte)2)), + KuduPredicate.newInListPredicate(b, ImmutableList.of((byte) 0, (byte) 1, (byte)2)), + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 0, (byte) 1, (byte)2))); + + // a in [0, 1, 2, 3], b in [0, 1, 2, 3], c in [0, 1, 2, 3]; + checkPartitions(8, 8, table, partitions, + KuduPredicate.newInListPredicate( + a, ImmutableList.of((byte) 0, (byte) 1, (byte) 2, (byte) 3)), + KuduPredicate.newInListPredicate( + b, ImmutableList.of((byte) 0, (byte) 1, (byte) 2, (byte) 3)), + KuduPredicate.newInListPredicate( + c, ImmutableList.of((byte) 0, (byte) 1, (byte) 2, (byte) 3))); + + // The following test cases, we give more tests to make sure its correctess. + { + List> expectedList = new ArrayList<>(); + expectedList.add(ImmutableList.of(1, 1)); + expectedList.add(ImmutableList.of(8, 8)); + expectedList.add(ImmutableList.of(8, 8)); + expectedList.add(ImmutableList.of(8, 8)); + expectedList.add(ImmutableList.of(27, 1)); + expectedList.add(ImmutableList.of(27, 1)); + expectedList.add(ImmutableList.of(27, 1)); + expectedList.add(ImmutableList.of(27, 1)); + expectedList.add(ImmutableList.of(27, 1)); + expectedList.add(ImmutableList.of(27, 1)); + + for (int size = 1; size <= 10; size++) { + int columnCount = schema.getColumnCount(); + List> testCases = new ArrayList<>(); + for (int i = 0; i < columnCount; i++) { + List testCase = new ArrayList<>(); + for (int j = 0; j < size; j++) { + testCase.add((byte) j); + } + testCases.add(testCase); + } + + KuduScanner.KuduScannerBuilder scanBuilder = client.newScannerBuilder(table); + + List columnSchemas = new ArrayList<>(); + columnSchemas.add(a); + columnSchemas.add(b); + columnSchemas.add(c); + KuduPredicate[] predicates = new KuduPredicate[3]; + for (int i = 0; i < 3; i++) { + predicates[i] = KuduPredicate.newInListPredicate(columnSchemas.get(i), + ImmutableList.copyOf(testCases.get(i))); + scanBuilder.addPredicate(predicates[i]); + } + checkPartitions(expectedList.get(size - 1).get(0), + expectedList.get(size - 1).get(1), table, partitions, predicates); + Schema scanSchema = scanBuilder.table.getSchema(); + PartitionSchema partitionSchema = scanBuilder.table.getPartitionSchema(); + + List hashBucketSchemas = + partitionSchema.getHashBucketSchemas(); + assertEquals(columnCount, hashBucketSchemas.size()); + for (PartitionSchema.HashBucketSchema hashSchema : hashBucketSchemas) { + BitSet oldBitset = pruneHashComponent(scanSchema, hashSchema, + scanBuilder.predicates); + BitSet newBitset = PartitionPruner.pruneHashComponentV2ForTest(scanSchema, hashSchema, + scanBuilder.predicates); + Assert.assertEquals(oldBitset, newBitset); + } + } + } + } + + @Test + public void testMultiColumnInListHashPruning() throws Exception { + // CREATE TABLE t + // (a INT8, b INT8, c INT8) + // PRIMARY KEY (a, b, c) + // PARTITION BY HASH (a) PARTITIONS 3, + // HASH (b, c) PARTITIONS 3; + ColumnSchema a = new ColumnSchema.ColumnSchemaBuilder("a", Type.INT8).key(true).build(); + ColumnSchema b = new ColumnSchema.ColumnSchemaBuilder("b", Type.INT8).key(true).build(); + ColumnSchema c = new ColumnSchema.ColumnSchemaBuilder("c", Type.INT8).key(true).build(); + final Schema schema = new Schema(ImmutableList.of(a, b, c)); + + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(new ArrayList<>()); + tableBuilder.addHashPartitions(ImmutableList.of("a"), 3); + tableBuilder.addHashPartitions(ImmutableList.of("b", "c"), 3); + + String tableName = "testMultiColumnInListHashPartitionPruning-" + System.currentTimeMillis(); + client.createTable(tableName, schema, tableBuilder); + KuduTable table = client.openTable(tableName); + List partitions = getTablePartitions(table); + + // a in [0, 1]; + checkPartitions(6, 2, table, partitions, + KuduPredicate.newInListPredicate(a, ImmutableList.of((byte) 0, (byte) 1))); + + // a in [0, 1, 8]; + checkPartitions(9, 1, table, partitions, + KuduPredicate.newInListPredicate(a, ImmutableList.of((byte) 0, (byte) 1, (byte) 8))); + + // b in [0, 1]; + checkPartitions(9, 1, table, partitions, + KuduPredicate.newInListPredicate(b, ImmutableList.of((byte) 0, (byte) 1))); + + // c in [0, 1]; + checkPartitions(9, 1, table, partitions, + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 0, (byte) 1))); + + // b in [0, 1], c in [0, 1] + // (0, 0) in bucket 2 + // (0, 1) in bucket 2 + // (1, 0) in bucket 1 + // (1, 1) in bucket 0 + checkPartitions(9, 1, table, partitions, + KuduPredicate.newInListPredicate(b, ImmutableList.of((byte) 0, (byte) 1)), + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 0, (byte) 1))); + + // b = 0, c in [0, 1] + checkPartitions(3, 3, table, partitions, + KuduPredicate.newComparisonPredicate(b, ComparisonOp.EQUAL, 0), + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 0, (byte) 1))); + + // b = 1, c in [0, 1] + checkPartitions(6, 6, table, partitions, + KuduPredicate.newComparisonPredicate(b, ComparisonOp.EQUAL, 1), + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 0, (byte) 1))); + + // a in [0, 1], b in [0, 1], c in [0, 1]; + checkPartitions(6, 2, table, partitions, + KuduPredicate.newInListPredicate(a, ImmutableList.of((byte) 0, (byte) 1)), + KuduPredicate.newInListPredicate(b, ImmutableList.of((byte) 0, (byte) 1)), + KuduPredicate.newInListPredicate(c, ImmutableList.of((byte) 0, (byte) 1))); + } + + // This unit test to make sure to correctness of the newer algorithm compare + // with older algorithm. Generate a random list of values and make sure they can not cause + // OOM condition and check the two algorithms' results are the same. + // At the same time, compare the performance of the two algorithms. + @Test + public void testInListHashPartitionPruningUsingLargeList() throws Exception { + // To test normal case, this unit test should not go out of memory, so + // set totalCount = 100 and inListMaxLength = 10; + List returnValues = this.prepareForLargeInListPredicates(client, + "testInListHashPartitionPruningUsingLargeList", 100, 10); + for (ReturnValueHelper returnValue : returnValues) { + long v1CostMillis = 0; + long v2CostMillis = 0; + long combinationCount = 1; + for (PartitionSchema.HashBucketSchema hashSchema : + returnValue.partitionSchema.getHashBucketSchemas()) { + long startTime = System.currentTimeMillis(); + final BitSet oldBitset = pruneHashComponent( + returnValue.schema, hashSchema, returnValue.predicates); + v1CostMillis += (System.currentTimeMillis() - startTime); + startTime = System.currentTimeMillis(); + final BitSet newBitset = PartitionPruner.pruneHashComponentV2ForTest( + returnValue.schema, hashSchema, returnValue.predicates); + v2CostMillis += (System.currentTimeMillis() - startTime); + Assert.assertEquals(oldBitset, newBitset); + combinationCount *= returnValue.predicates.size(); + } + // v2 algorithm is more efficient than v1 algorithm. + // The following logs can compare v2 and v1. + // v2 (new algorithm) is quicker 100x than v1 (older one). + if (v2CostMillis != 0 && v1CostMillis != 0) { + LOG.info("combination_count: {}, old algorithm " + + "cost: {}ms, new algorithm cost: {}ms, speedup: {}", + combinationCount, v1CostMillis, v2CostMillis, + (double) v1CostMillis / v2CostMillis); + } + } + } + + // The unit test to make sure that very long in-list predicates can cause + // OOM condition in v1 algorithm(older one), and using v2 algorithm(newer one) can solve it. + // For details on testing for the OOM condition, see the in-line + // TODO comment in the end this scenario. + @Test + public void testInListHashPartitionPruningUsingLargeListOOM() throws Exception { + // To test OOM case, set totalCount = 10 and inListMaxLength = 100; + List returnValues = this.prepareForLargeInListPredicates(client, + "testInListHashPartitionPruningUsingLargeListOOM", 10, 100); + for (ReturnValueHelper returnValue : returnValues) { + for (PartitionSchema.HashBucketSchema hashSchema : + returnValue.partitionSchema.getHashBucketSchemas()) { + // TODO(duyuqi) + // How to add a test case for the oom? + // Comments: + // the org.apache.kudu.client.TestPartitionPruner > + // testInListHashPartitionPruningUsingLargeListOOM FAILED + // java.lang.OutOfMemoryError: GC overhead limit exceeded + // PartitionPruner.pruneHashComponentV1ForTest(scanSchema, hashSchema, + // scanBuilder.predicates); + PartitionPruner.pruneHashComponentV2ForTest(returnValue.schema, hashSchema, + returnValue.predicates); + } + } + } + + @Test + public void testPruning() throws Exception { + // CREATE TABLE timeseries + // (host STRING, metric STRING, timestamp UNIXTIME_MICROS, value DOUBLE) + // PRIMARY KEY (host, metric, time) + // DISTRIBUTE BY + // RANGE(time) + // (PARTITION VALUES < 10, + // PARTITION VALUES >= 10); + // HASH (host, metric) 2 PARTITIONS; + + ColumnSchema host = + new ColumnSchema.ColumnSchemaBuilder("host", Type.STRING).key(true).build(); + ColumnSchema metric = + new ColumnSchema.ColumnSchemaBuilder("metric", Type.STRING).key(true).build(); + ColumnSchema timestamp = + new ColumnSchema.ColumnSchemaBuilder("timestamp", Type.UNIXTIME_MICROS) + .key(true).build(); + ColumnSchema value = new ColumnSchema.ColumnSchemaBuilder("value", Type.DOUBLE).build(); + Schema schema = new Schema(ImmutableList.of(host, metric, timestamp, value)); + + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(ImmutableList.of("timestamp")); + + PartialRow split = schema.newPartialRow(); + split.addLong("timestamp", 10); + tableBuilder.addSplitRow(split); + + tableBuilder.addHashPartitions(ImmutableList.of("host", "metric"), 2); + + String tableName = "testPruning-" + System.currentTimeMillis(); + client.createTable(tableName, schema, tableBuilder); + KuduTable table = client.openTable(tableName); + List partitions = getTablePartitions(table); + + // No Predicates + checkPartitions(4, 1, table, partitions); + + // host = "a" + checkPartitions(4, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a")); + + // host = "a" + // metric = "a" + checkPartitions(2, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a")); + + // host = "a" + // metric = "a" + // timestamp >= 9; + checkPartitions(2, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.GREATER_EQUAL, 9)); + + // host = "a" + // metric = "a" + // timestamp >= 10; + // timestamp < 20; + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.GREATER_EQUAL, 10), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.LESS, 20)); + + // host = "a" + // metric = "a" + // timestamp < 10; + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.LESS, 10)); + + // host = "a" + // metric = "a" + // timestamp >= 10; + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.GREATER_EQUAL, 10)); + + // host = "a" + // metric = "a" + // timestamp = 10; + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.EQUAL, 10)); + + byte[] hash1 = new byte[] { 0, 0, 0, 1 }; + + // partition key < (hash=1) + checkPartitions(2, 1, table, partitions, null, hash1); + + // partition key >= (hash=1) + checkPartitions(2, 1, table, partitions, hash1, null); + + // timestamp = 10 + // partition key < (hash=1) + checkPartitions(1, 1, table, partitions, null, hash1, + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.EQUAL, 10)); + + // timestamp = 10 + // partition key >= (hash=1) + checkPartitions(1, 1, table, partitions, hash1,null, + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.EQUAL, 10)); + + // timestamp IN (0, 9) + // host = "a" + // metric IN ("foo", "baz") + checkPartitions(1, 1, table, partitions, + KuduPredicate.newInListPredicate(timestamp, ImmutableList.of(0L, 9L)), + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newInListPredicate(metric, ImmutableList.of("foo", "baz"))); + + // timestamp IN (10, 100) + checkPartitions(2, 2, table, partitions, + KuduPredicate.newInListPredicate(timestamp, ImmutableList.of(10L, 100L))); + + // timestamp IN (9, 10) + checkPartitions(4, 2, table, partitions, + KuduPredicate.newInListPredicate(timestamp, ImmutableList.of(9L, 10L))); + + // timestamp IS NOT NULL + checkPartitions(4, 1, table, partitions, + KuduPredicate.newIsNotNullPredicate(timestamp)); + + // timestamp IS NULL + checkPartitions(0, 0, table, partitions, + KuduPredicate.newIsNullPredicate(timestamp)); + } + + @Test(timeout = 100000) + public void testPruningWithCustomHashSchemas() throws Exception { + // CREATE TABLE timeseries + // (host STRING, metric STRING, timestamp UNIXTIME_MICROS, value DOUBLE) + // PRIMARY KEY (host, metric, timestamp) + // + // RANGE(timestamp) + // (PARTITION VALUES >= 0, + // PARTITION VALUES < 100) + // HASH (host, metric) 2 PARTITIONS, + // + // RANGE(timestamp) + // (PARTITION VALUES >= 100, + // PARTITION VALUES < 200) + // HASH (host) 5 PARTITIONS + + ColumnSchema host = + new ColumnSchema.ColumnSchemaBuilder("host", Type.STRING).key(true).build(); + ColumnSchema metric = + new ColumnSchema.ColumnSchemaBuilder("metric", Type.STRING).key(true).build(); + ColumnSchema timestamp = + new ColumnSchema.ColumnSchemaBuilder("timestamp", Type.UNIXTIME_MICROS) + .key(true).build(); + ColumnSchema value = new ColumnSchema.ColumnSchemaBuilder("value", Type.DOUBLE).build(); + final Schema schema = new Schema(ImmutableList.of(host, metric, timestamp, value)); + + CreateTableOptions tableBuilder = new CreateTableOptions(); + tableBuilder.setRangePartitionColumns(ImmutableList.of("timestamp")); + + // Add range partition with table-wide hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addLong("timestamp", 0); + PartialRow upper = schema.newPartialRow(); + upper.addLong("timestamp", 100); + tableBuilder.addRangePartition(lower, upper); + } + + // Add range partition with custom hash schema. + { + PartialRow lower = schema.newPartialRow(); + lower.addLong("timestamp", 100); + PartialRow upper = schema.newPartialRow(); + upper.addLong("timestamp", 200); + + RangePartitionWithCustomHashSchema rangePartition = + new RangePartitionWithCustomHashSchema( + lower, + upper, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND); + rangePartition.addHashPartitions(ImmutableList.of("host"), 5, 0); + + tableBuilder.addRangePartition(rangePartition); + } + + // Add table-wide hash schema. + tableBuilder.addHashPartitions(ImmutableList.of("host", "metric"), 2); + + String tableName = "testPruningCHS"; + client.createTable(tableName, schema, tableBuilder); + KuduTable table = client.openTable(tableName); + + final List partitions = getTablePartitions(table); + assertEquals(7, partitions.size()); + + // No Predicates + checkPartitions(7, 9, table, partitions); + + checkPartitions(7, 7, table, partitions, + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.GREATER_EQUAL, 50), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.LESS, 150)); + + // host = "a" + // 2 tablets from the HASH(host, metric) range and 1 from the HASH(host) range. + checkPartitions(3, 5, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a")); + + // host = "a" + // metric = "a" + checkPartitions(2, 3, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a")); + + // host = "a" + // metric = "a" + // timestamp >= 9; + checkPartitions(2, 3, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.GREATER_EQUAL, 9)); + + // host = "a" + // metric = "a" + // timestamp >= 10; + // timestamp < 20; + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.GREATER_EQUAL, 10), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.LESS, 20)); + + // host = "a" + // metric = "a" + // timestamp >= 100; + // timestamp < 200; + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.GREATER_EQUAL, 10), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.LESS, 20)); + + // host = "a" + // metric = "a" + // timestamp < 10; + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.LESS, 10)); + + // host = "a" + // metric = "a" + // timestamp < 100; + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.LESS, 100)); + + // host = "a" + // metric = "a" + // timestamp >= 10; + checkPartitions(2, 3, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.GREATER_EQUAL, 10)); + + // host = "a" + // metric = "a" + // timestamp >= 100; + checkPartitions(1, 2, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.GREATER_EQUAL, 100)); + + // host = "a" + // metric = "a" + // timestamp = 100; + checkPartitions(1, 1, table, partitions, + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(metric, ComparisonOp.EQUAL, "a"), + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.EQUAL, 100)); + + final byte[] hash1 = new byte[] { 0, 0, 0, 1 }; + + // partition key < (hash=1) + // scan partitions: 1 + 1 + 1 + checkPartitions(2, 3, table, partitions, null, hash1); + + // partition key >= (hash=1) + // scan partitions: 1 + 4 + 1 + checkPartitions(5, 6, table, partitions, hash1, null); + + // timestamp = 10 + // partition key < (hash=1) + // scan partitions: 0 + 1 + 0 + checkPartitions(1, 1, table, partitions, null, hash1, + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.EQUAL, 10)); + + // timestamp = 10 + // partition key >= (hash=1) + checkPartitions(1, 1, table, partitions, hash1, null, + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.EQUAL, 10)); + + // timestamp = 100 + // partition key >= (hash=1) + checkPartitions(4, 4, table, partitions, hash1, null, + KuduPredicate.newComparisonPredicate(timestamp, ComparisonOp.EQUAL, 100)); + + // timestamp IN (99, 100) + // host = "a" + // metric IN ("foo", "baz") + checkPartitions(2, 2, table, partitions, + KuduPredicate.newInListPredicate(timestamp, ImmutableList.of(99L, 100L)), + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newInListPredicate(metric, ImmutableList.of("foo", "baz"))); + + // timestamp IN (100, 199) + // host = "a" + // metric IN ("foo", "baz") + checkPartitions(1, 1, table, partitions, + KuduPredicate.newInListPredicate(timestamp, ImmutableList.of(100L, 199L)), + KuduPredicate.newComparisonPredicate(host, ComparisonOp.EQUAL, "a"), + KuduPredicate.newInListPredicate(metric, ImmutableList.of("foo", "baz"))); + + // timestamp IN (0, 10) + checkPartitions(2, 2, table, partitions, + KuduPredicate.newInListPredicate(timestamp, ImmutableList.of(0L, 10L))); + + // timestamp IN (100, 110) + checkPartitions(5, 5, table, partitions, + KuduPredicate.newInListPredicate(timestamp, ImmutableList.of(100L, 110L))); + + // timestamp IN (99, 100) + checkPartitions(7, 7, table, partitions, + KuduPredicate.newInListPredicate(timestamp, ImmutableList.of(99L, 100L))); + + // timestamp IS NOT NULL + checkPartitions(7, 9, table, partitions, + KuduPredicate.newIsNotNullPredicate(timestamp)); + + // timestamp IS NULL + checkPartitions(0, 0, table, partitions, + KuduPredicate.newIsNullPredicate(timestamp)); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRemoteTablet.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRemoteTablet.java new file mode 100644 index 0000000000..c3e063abc7 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRemoteTablet.java @@ -0,0 +1,299 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.List; + +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.consensus.Metadata; +import org.apache.kudu.test.ProtobufUtils; +import org.apache.kudu.test.junit.RetryRule; + +public class TestRemoteTablet { + private static final String kClientLocation = "/fake-client"; + private static final String kLocation = "/fake-noclient"; + private static final String kNoLocation = ""; + private static final String[] kUuids = { "uuid-0", "uuid-1", "uuid-2" }; + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Test + public void testLeaderLastRemovedLast() { + RemoteTablet tablet = getTablet(2); + + // Demote the wrong leader, no-op. + assertEquals(kUuids[2], tablet.getLeaderServerInfo().getUuid()); + tablet.demoteLeader(kUuids[1]); + assertEquals(kUuids[2], tablet.getLeaderServerInfo().getUuid()); + + // Tablet at server 1 was deleted. + assertTrue(tablet.removeTabletClient(kUuids[1])); + assertEquals(kUuids[2], tablet.getLeaderServerInfo().getUuid()); + + // Simulate another thread trying to remove 1. + assertFalse(tablet.removeTabletClient(kUuids[1])); + + // Tablet at server 0 was deleted. + assertTrue(tablet.removeTabletClient(kUuids[0])); + assertEquals(kUuids[2], tablet.getLeaderServerInfo().getUuid()); + + // Leader was demoted. + tablet.demoteLeader(kUuids[2]); + assertNull(tablet.getLeaderServerInfo()); + + // Simulate another thread doing the same. + tablet.demoteLeader(kUuids[2]); + assertNull(tablet.getLeaderServerInfo()); + } + + @Test + public void testLeaderLastRemovedFirst() { + RemoteTablet tablet = getTablet(2); + + // Test we can remove it. + assertTrue(tablet.removeTabletClient("uuid-2")); + assertNull(tablet.getLeaderServerInfo()); + + // Test demoting it doesn't break anything. + tablet.demoteLeader("uuid-2"); + assertNull(tablet.getLeaderServerInfo()); + } + + @Test + public void testLeaderFirst() { + RemoteTablet tablet = getTablet(0); + + // Test we can remove it. + assertTrue(tablet.removeTabletClient("uuid-0")); + assertNull(tablet.getLeaderServerInfo()); + + // Test demoting it doesn't break anything. + tablet.demoteLeader("uuid-0"); + assertNull(tablet.getLeaderServerInfo()); + + // Test removing a server with no leader doesn't break. + assertTrue(tablet.removeTabletClient("uuid-2")); + } + + @Test + public void testLocalReplica() { + { + // Let's examine a tablet where the first UUID is local to the client, + // but no UUID is in the same location. + RemoteTablet tablet = getTablet(0, 0, -1); + + // If the client has no location, we should pick the local server. + assertEquals(kUuids[0], tablet.getClosestServerInfo(kNoLocation).getUuid()); + + // NOTE: if the client did have a location, because the test replicas are + // assigned a different default location, they aren't considered local, + // so we would select one at random. + } + + { + // Let's examine a tablet where the first UUID is local to the client, + // and the second is in the same location. + RemoteTablet tablet = getTablet(0, 0, 1); + + // If the client has no location, we should pick the local server. + assertEquals(kUuids[0], tablet.getClosestServerInfo(kNoLocation).getUuid()); + + // If the client does have a location, we should pick the one in the same + // location. + assertEquals(kUuids[1], tablet.getClosestServerInfo(kClientLocation).getUuid()); + } + + { + // Let's examine a tablet where the first UUID is local to the client and + // is also in the same location. + RemoteTablet tablet = getTablet(0, 0, 0); + + // If the client has no location, we should pick the local server. + assertEquals(kUuids[0], tablet.getClosestServerInfo(kNoLocation).getUuid()); + + // If the client does have a location, we should pick the one in the same + // location. + assertEquals(kUuids[0], tablet.getClosestServerInfo(kClientLocation).getUuid()); + } + } + + @Test + public void testNoLocalOrSameLocationReplica() { + RemoteTablet tablet = getTablet(0, -1, -1); + + // We just care about getting one back. + assertNotNull(tablet.getClosestServerInfo(kClientLocation).getUuid()); + } + + @Test + public void testReplicaWithNoValidLocation() { + RemoteTablet tablet = getTablet(0, 1, 2); + + // Test removing all tablet servers doesn't break. + for (String uuid : kUuids) { + assertTrue(tablet.removeTabletClient(uuid)); + } + assertNull(tablet.getLeaderServerInfo()); + assertNull(tablet.getClosestServerInfo(kNoLocation)); + assertNull(tablet.getClosestServerInfo(kClientLocation)); + } + + @Test + public void testReplicaSelection() { + { + RemoteTablet tablet = getTablet(0, 1, 2); + + // LEADER_ONLY picks the leader even if there's a local replica. + assertEquals(kUuids[0], + tablet.getReplicaSelectedServerInfo(ReplicaSelection.LEADER_ONLY, kClientLocation) + .getUuid()); + + // Since there are locations assigned, CLOSEST_REPLICA picks the replica + // in the same location, even if there's a local one. + assertEquals(kUuids[2], + tablet.getReplicaSelectedServerInfo(ReplicaSelection.CLOSEST_REPLICA, kClientLocation) + .getUuid()); + } + + { + RemoteTablet tablet = getTablet(0, 1, -1); + + // LEADER_ONLY picks the leader even if there's a local replica. + assertEquals(kUuids[0], + tablet.getReplicaSelectedServerInfo(ReplicaSelection.LEADER_ONLY, kClientLocation) + .getUuid()); + + // NOTE: the test replicas are assigned a default location. So, even if + // there are local replicas, because they are in different locations than + // the client, with CLOSEST_REPLICA, a replica is chosen at random. + } + + { + RemoteTablet tablet = getTablet(0, -1, 1); + + // LEADER_ONLY picks the leader even if there's a replica with the same location. + assertEquals(kUuids[0], + tablet.getReplicaSelectedServerInfo(ReplicaSelection.LEADER_ONLY, kClientLocation) + .getUuid()); + + // CLOSEST_REPLICA picks the replica in the same location. + assertEquals(kUuids[1], + tablet.getReplicaSelectedServerInfo(ReplicaSelection.CLOSEST_REPLICA, kClientLocation) + .getUuid()); + } + } + + // AsyncKuduClient has methods like scanNextRows, keepAlive, and closeScanner that rely on + // RemoteTablet.getReplicaSelectedServerInfo to be deterministic given the same state. + // This ensures follow up calls are routed to the same server with the scanner open. + // This test ensures that remains true. + @Test + public void testGetReplicaSelectedServerInfoDeterminism() { + // There's a local leader replica. + RemoteTablet tabletWithLocal = getTablet(0, 0, 0); + verifyGetReplicaSelectedServerInfoDeterminism(tabletWithLocal); + + // There's a leader in the same location as the client. + RemoteTablet tabletWithSameLocation = getTablet(0, -1, 0); + verifyGetReplicaSelectedServerInfoDeterminism(tabletWithSameLocation); + + // There's no local replica or replica in the same location. + RemoteTablet tabletWithRemote = getTablet(0, -1, -1); + verifyGetReplicaSelectedServerInfoDeterminism(tabletWithRemote); + } + + private void verifyGetReplicaSelectedServerInfoDeterminism(RemoteTablet tablet) { + String init = tablet + .getReplicaSelectedServerInfo(ReplicaSelection.CLOSEST_REPLICA, kClientLocation) + .getUuid(); + for (int i = 0; i < 10; i++) { + String next = tablet + .getReplicaSelectedServerInfo(ReplicaSelection.CLOSEST_REPLICA, kClientLocation) + .getUuid(); + assertEquals("getReplicaSelectedServerInfo was not deterministic", init, next); + } + } + + @Test + public void testToString() { + RemoteTablet tablet = getTablet(0, 1, -1); + assertEquals("fake tablet@[uuid-0(host:1000)[L],uuid-1(host:1001),uuid-2(host:1002)]", + tablet.toString()); + } + + private RemoteTablet getTablet(int leaderIndex) { + return getTablet(leaderIndex, -1, -1); + } + + // Returns a three-replica remote tablet that considers the given indices of + // replicas to be leader, local to the client, and in the same location. + static RemoteTablet getTablet(int leaderIndex, + int localReplicaIndex, + int sameLocationReplicaIndex) { + return getTablet( + leaderIndex, localReplicaIndex, sameLocationReplicaIndex, + AsyncKuduClient.EMPTY_ARRAY, AsyncKuduClient.EMPTY_ARRAY); + } + + static RemoteTablet getTablet(int leaderIndex, + int localReplicaIndex, + int sameLocationReplicaIndex, + byte[] partitionKeyStart, + byte[] partitionKeyEnd) { + Partition partition = ProtobufHelper.pbToPartition( + ProtobufUtils.getFakePartitionPB(partitionKeyStart, partitionKeyEnd).build()); + List replicas = new ArrayList<>(); + List servers = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + InetAddress addr; + try { + if (i == localReplicaIndex) { + addr = InetAddress.getByName("127.0.0.1"); + } else { + addr = InetAddress.getByName("1.2.3.4"); + } + } catch (UnknownHostException e) { + throw new RuntimeException(e); + } + + String uuid = kUuids[i]; + String location = i == sameLocationReplicaIndex ? kClientLocation : kLocation; + servers.add(new ServerInfo(uuid, + new HostAndPort("host", 1000 + i), + addr, + location)); + Metadata.RaftPeerPB.Role role = leaderIndex == i ? Metadata.RaftPeerPB.Role.LEADER : + Metadata.RaftPeerPB.Role.FOLLOWER; + replicas.add(new LocatedTablet.Replica("host", i, role, null)); + } + + return new RemoteTablet("fake table", "fake tablet", partition, replicas, servers); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRequestTracker.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRequestTracker.java new file mode 100644 index 0000000000..234bf078df --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRequestTracker.java @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.junit.Assert.assertEquals; + +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicBoolean; + +import com.google.common.collect.Lists; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.test.junit.RetryRule; + +public class TestRequestTracker { + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Test(timeout = 10000) + public void test() { + RequestTracker tracker = new RequestTracker("test"); + + // A new tracker should have no incomplete RPCs. + assertEquals(RequestTracker.NO_SEQ_NO, tracker.firstIncomplete()); + + int max = 10; + + for (int i = 0; i < max; i++) { + tracker.newSeqNo(); + } + + // The first RPC is the incomplete one. + assertEquals(1, tracker.firstIncomplete()); + + // Mark the first as complete, incomplete should advance by 1. + tracker.rpcCompleted(1); + assertEquals(2, tracker.firstIncomplete()); + + // Mark the RPC in the middle as complete, first incomplete doesn't change. + tracker.rpcCompleted(5); + assertEquals(2, tracker.firstIncomplete()); + + // Mark 2-4 inclusive as complete. + for (int i = 2; i <= 4; i++) { + tracker.rpcCompleted(i); + } + + assertEquals(6, tracker.firstIncomplete()); + + // Get a few more sequence numbers. + long lastSeqNo = 0; + for (int i = max / 2; i <= max; i++) { + lastSeqNo = tracker.newSeqNo(); + } + + // Mark them all as complete except the last one. + while (tracker.firstIncomplete() != lastSeqNo) { + tracker.rpcCompleted(tracker.firstIncomplete()); + } + + assertEquals(lastSeqNo, tracker.firstIncomplete()); + tracker.rpcCompleted(lastSeqNo); + + // Test that we get back to NO_SEQ_NO after marking them all. + assertEquals(RequestTracker.NO_SEQ_NO, tracker.firstIncomplete()); + } + + private static class Checker { + long curIncomplete = 0; + + public synchronized void check(long seqNo, long firstIncomplete) { + Assert.assertTrue("should not send a seq number that was previously marked complete", + seqNo >= curIncomplete); + curIncomplete = Math.max(firstIncomplete, curIncomplete); + } + } + + @Test(timeout = 30000) + public void testMultiThreaded() throws InterruptedException, ExecutionException { + final AtomicBoolean done = new AtomicBoolean(false); + final RequestTracker rt = new RequestTracker("fake id"); + final Checker checker = new Checker(); + ExecutorService exec = Executors.newCachedThreadPool(); + List> futures = Lists.newArrayList(); + for (int i = 0; i < 16; i++) { + futures.add(exec.submit(new Callable() { + @Override + public Void call() { + while (!done.get()) { + long seqNo = rt.newSeqNo(); + long incomplete = rt.firstIncomplete(); + checker.check(seqNo, incomplete); + rt.rpcCompleted(seqNo); + } + return null; + } + })); + } + Thread.sleep(5000); + done.set(true); + for (Future f : futures) { + f.get(); + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRowErrors.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRowErrors.java new file mode 100644 index 0000000000..d476079e9d --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRowErrors.java @@ -0,0 +1,189 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.createFourTabletsTableWithNineRows; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.ImmutableList; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.util.DateUtil; + +public class TestRowErrors { + + private static final Schema basicSchema = getBasicSchema(); + + private KuduTable table; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Test(timeout = 100000) + public void singleTabletTest() throws Exception { + String tableName = TestRowErrors.class.getName() + "-" + System.currentTimeMillis(); + harness.getClient().createTable(tableName, basicSchema, getBasicCreateTableOptions()); + table = harness.getClient().openTable(tableName); + AsyncKuduSession session = harness.getAsyncClient().newSession(); + + // Insert 3 rows to play with. + for (int i = 0; i < 3; i++) { + session.apply(createInsert(i)).join(DEFAULT_SLEEP); + } + + // Try a single dupe row insert with AUTO_FLUSH_SYNC. + Insert dupeForZero = createInsert(0); + OperationResponse resp = session.apply(dupeForZero).join(DEFAULT_SLEEP); + assertTrue(resp.hasRowError()); + assertTrue(resp.getRowError().getOperation() == dupeForZero); + + // Now try inserting two dupes and one good row, make sure we get only two errors back. + dupeForZero = createInsert(0); + Insert dupeForTwo = createInsert(2); + session.setFlushMode(AsyncKuduSession.FlushMode.MANUAL_FLUSH); + session.apply(dupeForZero); + session.apply(dupeForTwo); + session.apply(createInsert(4)); + + List responses = session.flush().join(DEFAULT_SLEEP); + List errors = OperationResponse.collectErrors(responses); + assertEquals(2, errors.size()); + assertTrue(errors.get(0).getOperation() == dupeForZero); + assertTrue(errors.get(1).getOperation() == dupeForTwo); + } + + /** + * Test collecting errors from multiple tablets. + * @throws Exception + */ + @Test(timeout = 100000) + public void multiTabletTest() throws Exception { + String tableName = TestRowErrors.class.getName() + "-" + System.currentTimeMillis(); + createFourTabletsTableWithNineRows(harness.getAsyncClient(), tableName, DEFAULT_SLEEP); + table = harness.getClient().openTable(tableName); + KuduSession session = harness.getClient().newSession(); + session.setFlushMode(KuduSession.FlushMode.AUTO_FLUSH_BACKGROUND); + + final int dupRows = 3; + session.apply(createInsert(12)); + session.apply(createInsert(22)); + session.apply(createInsert(32)); + + session.flush(); + + RowErrorsAndOverflowStatus reos = session.getPendingErrors(); + assertEquals(dupRows, reos.getRowErrors().length); + assertEquals(0, session.countPendingErrors()); + } + + @Test(timeout = 100000) + public void readableRowErrorTest() throws Exception { + KuduSession session = harness.getClient().newSession(); + Map dataByType = new HashMap<>(); + dataByType.put(Type.INT32, "10000"); + dataByType.put(Type.DATE, "1970-01-01"); + dataByType.put(Type.STRING, "fun with ütf"); + dataByType.put(Type.BINARY, "[0, 1, 2, 3, 4]"); + int anotherColData = 101; + Type[] types = new Type[] {Type.INT32, Type.DATE, Type.STRING, Type.BINARY}; + for (Type dataType : types) { + flushDifferentTypeData(dataType, dataByType, anotherColData, session); + for (RowError re : session.getPendingErrors().getRowErrors()) { + String cmpStr = String.format("Row error for row=(%s c0=%s, int32 c1=%d)", + dataType.getName(), dataByType.get(dataType), anotherColData); + if (dataType == Type.STRING || dataType == Type.BINARY) { + cmpStr = String.format("Row error for row=(%s c0=\"%s\", int32 c1=%d)", + dataType.getName(), dataByType.get(dataType), anotherColData); + } + assertTrue(re.toString().contains(cmpStr)); + } + } + } + + private void flushDifferentTypeData(Type dataType, Map dataByType, + int anotherColData, KuduSession session) + throws Exception { + String tableName = TestRowErrors.class.getName() + "-" + System.currentTimeMillis(); + CreateTableOptions createOptions = new CreateTableOptions() + .addHashPartitions(ImmutableList.of("c0"), 2, 0); + ArrayList columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c0", dataType) + .nullable(false) + .key(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32) + .nullable(false) + .build()); + Schema schema = new Schema(columns); + + KuduClient client = harness.getClient(); + client.createTable(tableName, schema, createOptions); + table = client.openTable(tableName); + + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + + Update update = table.newUpdate(); + PartialRow row = update.getRow(); + switch (dataType) { + // Type.INT32. + case INT32: + row.addInt("c0", Integer.parseInt(dataByType.get(dataType))); + break; + // Type.DATE. + case DATE: + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd" ); + java.util.Date d1 = sdf.parse(dataByType.get(dataType)); + java.sql.Date d2 = new java.sql.Date(d1.getTime()); + row.addDate("c0", d2); + break; + // Type.STRING. + case STRING: + row.addString("c0", dataByType.get(dataType)); + break; + // Type.BINARY. + case BINARY: + row.addBinary("c0", dataByType.get(dataType).getBytes("UTF-8")); + break; + default: + return; + } + row.addInt("c1", anotherColData); + session.apply(update); + session.flush(); + } + + private Insert createInsert(int key) { + return createBasicSchemaInsert(table, key); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRowResult.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRowResult.java new file mode 100644 index 0000000000..2624cc3915 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRowResult.java @@ -0,0 +1,221 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.kudu.test.ClientTestUtil.getAllTypesCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getSchemaWithAllTypes; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.sql.Timestamp; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.util.DateUtil; + +public class TestRowResult { + + // Generate a unique table name + private static final String TABLE_NAME = + TestRowResult.class.getName() + "-" + System.currentTimeMillis(); + + private static final Schema allTypesSchema = getSchemaWithAllTypes(); + + // insert 5 rows to test result iterations + private static final int TEST_ROWS = 5; + + private KuduTable table; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() throws Exception { + harness.getClient().createTable(TABLE_NAME, allTypesSchema, getAllTypesCreateTableOptions()); + table = harness.getClient().openTable(TABLE_NAME); + + KuduClient client = harness.getClient(); + KuduSession session = client.newSession(); + + for (int i = 0; i < TEST_ROWS; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + + row.addByte(0, (byte) i); + row.addShort(1, (short) 2); + row.addInt(2, 3); + row.addLong(3, 4L); + row.addBoolean(4, true); + row.addFloat(5, 5.6f); + row.addDouble(6, 7.8); + row.addString(7, "string-value"); + row.addBinary(8, "binary-array".getBytes(UTF_8)); + ByteBuffer bb = ByteBuffer.wrap("binary-bytebuffer".getBytes(UTF_8)); + bb.position(7); // We're only inserting the bytebuffer part of the original array. + row.addBinary(9, bb); + row.setNull(10); + row.addTimestamp(11, new Timestamp(11)); + row.addDecimal(12, BigDecimal.valueOf(12345, 3)); + row.addVarchar(13, "varcharval"); + row.addDate(14, DateUtil.epochDaysToSqlDate(0)); + + session.apply(insert); + } + } + + @Test(timeout = 10000) + public void testRowwiseRowset() throws Exception { + KuduClient client = harness.getClient(); + KuduScanner scanner = client.newScannerBuilder(table).build(); + checkRows(scanner); + } + + @Test(timeout = 10000) + public void testRowwiseRowsetReuse() throws Exception { + KuduClient client = harness.getClient(); + KuduScanner scanner = client.newScannerBuilder(table).build(); + scanner.setReuseRowResult(true); + checkRows(scanner); + } + + @Test(timeout = 10000) + public void testColumnarRowset() throws Exception { + KuduClient client = harness.getClient(); + KuduScanner scanner = client.newScannerBuilder(table).build(); + scanner.setRowDataFormat(AsyncKuduScanner.RowDataFormat.COLUMNAR); + checkRows(scanner); + } + + @Test(timeout = 10000) + public void testColumnarRowsetReuse() throws Exception { + KuduClient client = harness.getClient(); + KuduScanner scanner = client.newScannerBuilder(table).build(); + scanner.setRowDataFormat(AsyncKuduScanner.RowDataFormat.COLUMNAR); + scanner.setReuseRowResult(true); + checkRows(scanner); + } + + private void checkRows(KuduScanner scanner) throws KuduException { + while (scanner.hasMoreRows()) { + RowResultIterator it = scanner.nextRows(); + assertEquals(TEST_ROWS, it.getNumRows()); + for (int i = 0; i < TEST_ROWS; i++) { + assertTrue(it.hasNext()); + RowResult rr = it.next(); + + assertEquals((byte) i, rr.getByte(0)); + assertEquals((byte) i, rr.getObject(0)); + assertEquals((byte) i, rr.getByte(allTypesSchema.getColumnByIndex(0).getName())); + + assertEquals((short) 2, rr.getShort(1)); + assertEquals((short) 2, rr.getObject(1)); + assertEquals((short) 2, rr.getShort(allTypesSchema.getColumnByIndex(1).getName())); + + assertEquals(3, rr.getInt(2)); + assertEquals(3, rr.getObject(2)); + assertEquals(3, rr.getInt(allTypesSchema.getColumnByIndex(2).getName())); + + assertEquals((long) 4, rr.getLong(3)); + assertEquals((long) 4, rr.getObject(3)); + assertEquals((long) 4, rr.getLong(allTypesSchema.getColumnByIndex(3).getName())); + + assertEquals(true, rr.getBoolean(4)); + assertEquals(true, rr.getObject(4)); + assertEquals(true, rr.getBoolean(allTypesSchema.getColumnByIndex(4).getName())); + + assertEquals(5.6f, rr.getFloat(5), .001f); + assertEquals(5.6f, (float) rr.getObject(5), .001f); + assertEquals(5.6f, + rr.getFloat(allTypesSchema.getColumnByIndex(5).getName()), .001f); + + assertEquals(7.8, rr.getDouble(6), .001); + assertEquals(7.8, (double) rr.getObject(6), .001); + assertEquals(7.8, + rr.getDouble(allTypesSchema.getColumnByIndex(6).getName()), .001f); + + assertEquals("string-value", rr.getString(7)); + assertEquals("string-value", rr.getObject(7)); + assertEquals("string-value", + rr.getString(allTypesSchema.getColumnByIndex(7).getName())); + + assertArrayEquals("binary-array".getBytes(UTF_8), rr.getBinaryCopy(8)); + assertArrayEquals("binary-array".getBytes(UTF_8), (byte[]) rr.getObject(8)); + assertArrayEquals("binary-array".getBytes(UTF_8), + rr.getBinaryCopy(allTypesSchema.getColumnByIndex(8).getName())); + + ByteBuffer buffer = rr.getBinary(8); + assertEquals(buffer, rr.getBinary(allTypesSchema.getColumnByIndex(8).getName())); + byte[] binaryValue = new byte[buffer.remaining()]; + buffer.get(binaryValue); + assertArrayEquals("binary-array".getBytes(UTF_8), binaryValue); + + assertArrayEquals("bytebuffer".getBytes(UTF_8), rr.getBinaryCopy(9)); + + assertEquals(true, rr.isNull(10)); + assertNull(rr.getObject(10)); + assertEquals(true, rr.isNull(allTypesSchema.getColumnByIndex(10).getName())); + + assertEquals(new Timestamp(11), rr.getTimestamp(11)); + assertEquals(new Timestamp(11), rr.getObject(11)); + assertEquals(new Timestamp(11), + rr.getTimestamp(allTypesSchema.getColumnByIndex(11).getName())); + + assertEquals(BigDecimal.valueOf(12345, 3), rr.getDecimal(12)); + assertEquals(BigDecimal.valueOf(12345, 3), rr.getObject(12)); + assertEquals(BigDecimal.valueOf(12345, 3), + rr.getDecimal(allTypesSchema.getColumnByIndex(12).getName())); + + assertEquals("varcharval", rr.getVarchar(13)); + assertEquals("varcharval", rr.getObject(13)); + assertEquals("varcharval", + rr.getVarchar(allTypesSchema.getColumnByIndex(13).getName())); + + assertEquals(DateUtil.epochDaysToSqlDate(0), rr.getDate(14)); + assertEquals(DateUtil.epochDaysToSqlDate(0), rr.getObject(14)); + assertEquals(DateUtil.epochDaysToSqlDate(0), + rr.getDate(allTypesSchema.getColumnByIndex(14).getName())); + + // We test with the column name once since it's the same method for all types, unlike above. + assertEquals(Type.INT8, rr.getColumnType(allTypesSchema.getColumnByIndex(0).getName())); + assertEquals(Type.INT8, rr.getColumnType(0)); + assertEquals(Type.INT16, rr.getColumnType(1)); + assertEquals(Type.INT32, rr.getColumnType(2)); + assertEquals(Type.INT64, rr.getColumnType(3)); + assertEquals(Type.BOOL, rr.getColumnType(4)); + assertEquals(Type.FLOAT, rr.getColumnType(5)); + assertEquals(Type.DOUBLE, rr.getColumnType(6)); + assertEquals(Type.STRING, rr.getColumnType(7)); + assertEquals(Type.BINARY, rr.getColumnType(8)); + assertEquals(Type.UNIXTIME_MICROS, rr.getColumnType(11)); + assertEquals(Type.DECIMAL, rr.getColumnType(12)); + assertEquals(Type.VARCHAR, rr.getColumnType(13)); + assertEquals(Type.DATE, rr.getColumnType(14)); + } + } + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRpcTraces.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRpcTraces.java new file mode 100644 index 0000000000..4546bf6daf --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestRpcTraces.java @@ -0,0 +1,204 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; + +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.List; + +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.client.RpcTraceFrame.Action; +import org.apache.kudu.client.RpcTraceFrame.RpcTraceFrameBuilder; +import org.apache.kudu.test.junit.RetryRule; + +public class TestRpcTraces { + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Test + public void testLimit() { + PingRequest ping = PingRequest.makeMasterPingRequest(); + + ping.addTrace(getTrace()); + assertNotTruncated(ping); + + for (int i = 0; i < KuduRpc.MAX_TRACES_SIZE - 2; i++) { + ping.addTrace(getTrace()); + } + assertNotTruncated(ping); + + ping.addTrace(getTrace()); + assertNotTruncated(ping); + + ping.addTrace(getTrace()); + assertTruncateIsLast(ping); + + ping.addTrace(getTrace()); + assertTruncateIsLast(ping); + } + + @Test + public void testParentRpc() { + PingRequest parent = PingRequest.makeMasterPingRequest(); + + PingRequest daughter = PingRequest.makeMasterPingRequest(); + PingRequest son = PingRequest.makeMasterPingRequest(); + + PingRequest sonsDaughter = PingRequest.makeMasterPingRequest(); + + sonsDaughter.setParentRpc(son); + son.setParentRpc(parent); + daughter.setParentRpc(parent); + + // Son's daughter => son => parent. + RpcTraceFrame trace = getTrace(); + sonsDaughter.addTrace(trace); + assertSame(trace, son.getImmutableTraces().get(0)); + assertTrue(parent.getImmutableTraces().get(0) == trace); + assertTrue(daughter.getImmutableTraces().isEmpty()); + + // Son => parent. + trace = getTrace(); + son.addTrace(trace); + assertSame(trace, son.getImmutableTraces().get(1)); + assertSame(trace, parent.getImmutableTraces().get(1)); + assertTrue(daughter.getImmutableTraces().isEmpty()); + assertEquals(1, sonsDaughter.getImmutableTraces().size()); + + // Daughter => parent. + trace = getTrace(); + daughter.addTrace(trace); + assertSame(trace, daughter.getImmutableTraces().get(0)); + assertSame(trace, parent.getImmutableTraces().get(2)); + assertEquals(2, son.getImmutableTraces().size()); + assertEquals(1, sonsDaughter.getImmutableTraces().size()); + + // Parent alone. + trace = getTrace(); + parent.addTrace(trace); + assertSame(trace, parent.getImmutableTraces().get(3)); + assertEquals(1, daughter.getImmutableTraces().size()); + assertEquals(2, son.getImmutableTraces().size()); + assertEquals(1, sonsDaughter.getImmutableTraces().size()); + } + + @Test + public void testTraceSummary() throws Exception { + List traces = new ArrayList<>(); + String emptySummary = RpcTraceFrame.getHumanReadableSummaryStringForTraces(traces); + assertEquals("No traces", emptySummary); + + // Test a minimal frame with no server info or status. + traces.add(new RpcTraceFrameBuilder("GetTableLocations", Action.QUERY_MASTER) + .build()); + String summary1 = RpcTraceFrame.getHumanReadableSummaryStringForTraces(traces); + assertTrue(summary1.contains("Sent(0)")); + assertTrue(summary1.contains("Received(0)")); + assertTrue(summary1.contains("Delayed(0)")); + assertTrue(summary1.contains("MasterRefresh(1)")); + assertTrue(summary1.contains("AuthRefresh(0)")); + assertTrue(summary1.contains("Truncated: false")); + assertFalse(summary1.contains("Sent:")); + assertFalse(summary1.contains("Received:")); + assertFalse(summary1.contains("Delayed:")); + + // Fake server info for building traces. + ServerInfo serverInfo = new ServerInfo( + "fake-uuid", + new HostAndPort("test.com", 12345), + InetAddress.getByName("10.1.2.3"), + /*location=*/""); + + // Test a few sent and received messages. + traces.add(new RpcTraceFrameBuilder("Batch", Action.SEND_TO_SERVER) + .build()); + traces.add(new RpcTraceFrameBuilder("Batch", Action.SEND_TO_SERVER) + .serverInfo(serverInfo) + .build()); + traces.add(new RpcTraceFrameBuilder("Batch", Action.RECEIVE_FROM_SERVER) + .build()); + traces.add(new RpcTraceFrameBuilder("Batch", Action.RECEIVE_FROM_SERVER) + .serverInfo(serverInfo) + .callStatus(Status.OK()) + .build()); + String summary2 = RpcTraceFrame.getHumanReadableSummaryStringForTraces(traces); + assertTrue(summary2.contains("Sent(2)")); + assertTrue(summary2.contains("Received(2)")); + assertTrue(summary2.contains("Delayed(0)")); + assertTrue(summary2.contains("MasterRefresh(1)")); + assertTrue(summary2.contains("AuthRefresh(0)")); + assertTrue(summary2.contains("Truncated: false")); + assertTrue(summary2.contains("Sent: (UNKNOWN, [ Batch, 1 ]), (fake-uuid, [ Batch, 1 ])")); + assertTrue(summary2.contains("Received: (UNKNOWN, [ UNKNOWN, 1 ]), (fake-uuid, [ OK, 1 ])")); + assertFalse(summary2.contains("Delayed:")); + + // Test delayed messages including auth wait. + traces.add(new RpcTraceFrameBuilder("Batch", Action.SLEEP_THEN_RETRY) + .serverInfo(serverInfo) + .build()); + traces.add(new RpcTraceFrameBuilder("Batch", + Action.GET_NEW_AUTHENTICATION_TOKEN_THEN_RETRY) + .serverInfo(serverInfo) + .build()); + String summary3 = RpcTraceFrame.getHumanReadableSummaryStringForTraces(traces); + assertTrue(summary3.contains("Sent(2)")); + assertTrue(summary3.contains("Received(2)")); + assertTrue(summary3.contains("Delayed(1)")); + assertTrue(summary3.contains("MasterRefresh(1)")); + assertTrue(summary3.contains("AuthRefresh(1)")); + assertTrue(summary3.contains("Truncated: false")); + assertFalse(summary2.contains("Delayed: (fake-uuid, [ Batch, 1 ])")); + + // Test truncation. + traces.add(new RpcTraceFrameBuilder("Batch", Action.TRACE_TRUNCATED) + .build()); + String summary4 = RpcTraceFrame.getHumanReadableSummaryStringForTraces(traces); + assertTrue(summary4.contains("Truncated: true")); + } + + private RpcTraceFrame getTrace() { + return new RpcTraceFrameBuilder( + "trace", + Action.QUERY_MASTER) // Just a random action. + .build(); + } + + private void assertNotTruncated(KuduRpc rpc) { + for (RpcTraceFrame trace : rpc.getImmutableTraces()) { + assertNotEquals(Action.TRACE_TRUNCATED, trace.getAction()); + } + } + + private void assertTruncateIsLast(KuduRpc rpc) { + List traces = rpc.getImmutableTraces(); + assertEquals(KuduRpc.MAX_TRACES_SIZE + 1, traces.size()); + for (int i = 0; i < traces.size() - 1; i++) { + assertNotEquals(Action.TRACE_TRUNCATED, traces.get(i).getAction()); + } + assertEquals(Action.TRACE_TRUNCATED, traces.get(traces.size() - 1).getAction()); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestScanPredicate.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestScanPredicate.java new file mode 100644 index 0000000000..8213b39c71 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestScanPredicate.java @@ -0,0 +1,778 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.List; +import java.util.NavigableSet; +import java.util.TreeSet; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSortedSet; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.KuduPredicate.ComparisonOp; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.util.CharUtil; +import org.apache.kudu.util.DecimalUtil; + +public class TestScanPredicate { + + private KuduClient client; + private AsyncKuduClient asyncClient; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() { + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + } + + private Schema createTableSchema(Type type) { + ColumnSchema key = new ColumnSchema.ColumnSchemaBuilder("key", Type.INT64).key(true).build(); + ColumnSchema val; + switch (type) { + case VARCHAR: + val = new ColumnSchema.ColumnSchemaBuilder("value", type) + .typeAttributes(CharUtil.typeAttributes(10)).nullable(true).build(); + break; + default: + val = new ColumnSchema.ColumnSchemaBuilder("value", type).nullable(true).build(); + break; + } + return new Schema(ImmutableList.of(key, val)); + } + + private static CreateTableOptions createTableOptions() { + return new CreateTableOptions().setRangePartitionColumns(ImmutableList.of("key")); + } + + private int countRows(KuduTable table, KuduPredicate... predicates) throws Exception { + KuduScanner.KuduScannerBuilder scanBuilder = + new KuduScanner.KuduScannerBuilder(asyncClient, table); + for (KuduPredicate predicate : predicates) { + scanBuilder.addPredicate(predicate); + } + + KuduScanner scanner = scanBuilder.build(); + + int count = 0; + while (scanner.hasMoreRows()) { + count += scanner.nextRows().getNumRows(); + } + return count; + } + + private NavigableSet createIntegerValues(Type type) { + NavigableSet values = new TreeSet<>(); + for (long i = -50; i < 50; i++) { + values.add(i); + } + values.add(KuduPredicate.minIntValue(type)); + values.add(KuduPredicate.minIntValue(type) + 1); + values.add(KuduPredicate.maxIntValue(type) - 1); + values.add(KuduPredicate.maxIntValue(type)); + return values; + } + + private List createIntegerTestValues(Type type) { + return ImmutableList.of( + KuduPredicate.minIntValue(type), + KuduPredicate.minIntValue(type) + 1, + -51L, + 50L, + 0L, + 49L, + 50L, + KuduPredicate.maxIntValue(type) - 1, + KuduPredicate.maxIntValue(type)); + } + + private NavigableSet createFloatValues() { + NavigableSet values = new TreeSet<>(); + for (long i = -50; i < 50; i++) { + values.add((float) i + (float) i / 100.0F); + } + + values.add(Float.NEGATIVE_INFINITY); + values.add(-Float.MAX_VALUE); + values.add(-Float.MIN_NORMAL); + values.add(-Float.MIN_VALUE); + values.add(Float.MIN_VALUE); + values.add(Float.MIN_NORMAL); + values.add(Float.MAX_VALUE); + values.add(Float.POSITIVE_INFINITY); + + // TODO: uncomment after fixing KUDU-1386 + // values.add(Float.NaN); + return values; + } + + private List createFloatTestValues() { + return ImmutableList.of( + Float.NEGATIVE_INFINITY, + -Float.MAX_VALUE, + -100.0F, + -1.1F, + -1.0F, + -Float.MIN_NORMAL, + -Float.MIN_VALUE, + 0.0F, + Float.MIN_VALUE, + Float.MIN_NORMAL, + 1.0F, + 1.1F, + 100.0F, + Float.MAX_VALUE, + Float.POSITIVE_INFINITY + // TODO: uncomment after fixing KUDU-1386 + // Float.NaN + ); + } + + private NavigableSet createDoubleValues() { + NavigableSet values = new TreeSet<>(); + for (long i = -50; i < 50; i++) { + values.add((double) i + (double) i / 100.0); + } + + values.add(Double.NEGATIVE_INFINITY); + values.add(-Double.MAX_VALUE); + values.add(-Double.MIN_NORMAL); + values.add(-Double.MIN_VALUE); + values.add(Double.MIN_VALUE); + values.add(Double.MIN_NORMAL); + values.add(Double.MAX_VALUE); + values.add(Double.POSITIVE_INFINITY); + + // TODO: uncomment after fixing KUDU-1386 + // values.add(Double.NaN); + return values; + } + + private List createDoubleTestValues() { + return ImmutableList.of( + Double.NEGATIVE_INFINITY, + -Double.MAX_VALUE, + -100.0, + -1.1, + -1.0, + -Double.MIN_NORMAL, + -Double.MIN_VALUE, + 0.0, + Double.MIN_VALUE, + Double.MIN_NORMAL, + 1.0, + 1.1, + 100.0, + Double.MAX_VALUE, + Double.POSITIVE_INFINITY + + // TODO: uncomment after fixing KUDU-1386 + // Double.NaN + ); + } + + // Returns a vector of decimal(4, 2) numbers from -50.50 (inclusive) to 50.50 + // (exclusive) (100 values) and boundary values. + private NavigableSet createDecimalValues() { + NavigableSet values = new TreeSet<>(); + for (long i = -50; i < 50; i++) { + values.add(BigDecimal.valueOf(i * 100 + i, 2)); + } + + values.add(BigDecimal.valueOf(-9999, 2)); + values.add(BigDecimal.valueOf(-9998, 2)); + values.add(BigDecimal.valueOf(9998, 2)); + values.add(BigDecimal.valueOf(9999, 2)); + + return values; + } + + private List createDecimalTestValues() { + return ImmutableList.of( + BigDecimal.valueOf(-9999, 2), + BigDecimal.valueOf(-9998, 2), + BigDecimal.valueOf(5100, 2), + BigDecimal.valueOf(-5000, 2), + BigDecimal.valueOf(0, 2), + BigDecimal.valueOf(4900, 2), + BigDecimal.valueOf(5000, 2), + BigDecimal.valueOf(9998, 2), + BigDecimal.valueOf(9999, 2) + ); + } + + private NavigableSet createStringValues() { + return ImmutableSortedSet.of("", "\0", "\0\0", "a", "a\0", "a\0a", "aa\0"); + } + + private List createStringTestValues() { + List values = new ArrayList<>(createStringValues()); + values.add("aa"); + values.add("\1"); + values.add("a\1"); + return values; + } + + private void checkIntPredicates(KuduTable table, + NavigableSet values, + List testValues) throws Exception { + ColumnSchema col = table.getSchema().getColumn("value"); + Assert.assertEquals(values.size() + 1, countRows(table)); + for (long v : testValues) { + // value = v + KuduPredicate equal = KuduPredicate.newComparisonPredicate(col, ComparisonOp.EQUAL, v); + Assert.assertEquals(values.contains(v) ? 1 : 0, countRows(table, equal)); + + // value >= v + KuduPredicate greaterEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER_EQUAL, v); + Assert.assertEquals(values.tailSet(v).size(), countRows(table, greaterEqual)); + + // value <= v + KuduPredicate lessEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS_EQUAL, v); + Assert.assertEquals(values.headSet(v, true).size(), countRows(table, lessEqual)); + + // value > v + KuduPredicate greater = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER, v); + Assert.assertEquals(values.tailSet(v, false).size(), countRows(table, greater)); + + // value < v + KuduPredicate less = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS, v); + Assert.assertEquals(values.headSet(v).size(), countRows(table, less)); + } + + KuduPredicate isNotNull = KuduPredicate.newIsNotNullPredicate(col); + Assert.assertEquals(values.size(), countRows(table, isNotNull)); + + KuduPredicate isNull = KuduPredicate.newIsNullPredicate(col); + Assert.assertEquals(1, countRows(table, isNull)); + } + + @Test + public void testBoolPredicates() throws Exception { + Schema schema = createTableSchema(Type.BOOL); + client.createTable("bool-table", schema, createTableOptions()); + KuduTable table = client.openTable("bool-table"); + + NavigableSet values = ImmutableSortedSet.of(false, true); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + long i = 0; + for (boolean value : values) { + Insert insert = table.newInsert(); + insert.getRow().addLong("key", i++); + insert.getRow().addBoolean("value", value); + session.apply(insert); + } + Insert nullInsert = table.newInsert(); + nullInsert.getRow().addLong("key", i); + nullInsert.getRow().setNull("value"); + session.apply(nullInsert); + session.flush(); + + ColumnSchema col = table.getSchema().getColumn("value"); + Assert.assertEquals(values.size() + 1, countRows(table)); + + for (boolean v : values) { + // value = v + KuduPredicate equal = KuduPredicate.newComparisonPredicate(col, ComparisonOp.EQUAL, v); + Assert.assertEquals(values.contains(v) ? 1 : 0, countRows(table, equal)); + + // value >= v + KuduPredicate greaterEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER_EQUAL, v); + Assert.assertEquals(values.tailSet(v).size(), countRows(table, greaterEqual)); + + // value <= v + KuduPredicate lessEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS_EQUAL, v); + Assert.assertEquals(values.headSet(v, true).size(), countRows(table, lessEqual)); + + // value > v + KuduPredicate greater = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER, v); + Assert.assertEquals(values.tailSet(v, false).size(), countRows(table, greater)); + + // value < v + KuduPredicate less = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS, v); + Assert.assertEquals(values.headSet(v).size(), countRows(table, less)); + } + } + + @Test + public void testBytePredicates() throws Exception { + Schema schema = createTableSchema(Type.INT8); + client.createTable("byte-table", schema, createTableOptions()); + KuduTable table = client.openTable("byte-table"); + + NavigableSet values = createIntegerValues(Type.INT8); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + long i = 0; + for (long value : values) { + Insert insert = table.newInsert(); + insert.getRow().addLong("key", i++); + insert.getRow().addByte("value", (byte) value); + session.apply(insert); + } + Insert nullInsert = table.newInsert(); + nullInsert.getRow().addLong("key", i); + nullInsert.getRow().setNull("value"); + session.apply(nullInsert); + session.flush(); + + checkIntPredicates(table, values, createIntegerTestValues(Type.INT8)); + } + + @Test + public void testShortPredicates() throws Exception { + Schema schema = createTableSchema(Type.INT16); + client.createTable("short-table", schema, + new CreateTableOptions().setRangePartitionColumns( + ImmutableList.of())); + KuduTable table = client.openTable("short-table"); + + NavigableSet values = createIntegerValues(Type.INT16); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + long i = 0; + for (long value : values) { + Insert insert = table.newInsert(); + insert.getRow().addLong("key", i++); + insert.getRow().addShort("value", (short) value); + session.apply(insert); + } + Insert nullInsert = table.newInsert(); + nullInsert.getRow().addLong("key", i); + nullInsert.getRow().setNull("value"); + session.apply(nullInsert); + session.flush(); + + checkIntPredicates(table, values, createIntegerTestValues(Type.INT16)); + } + + @Test + public void testIntPredicates() throws Exception { + Schema schema = createTableSchema(Type.INT32); + client.createTable("int-table", schema, createTableOptions()); + KuduTable table = client.openTable("int-table"); + + NavigableSet values = createIntegerValues(Type.INT32); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + long i = 0; + for (long value : values) { + Insert insert = table.newInsert(); + insert.getRow().addLong("key", i++); + insert.getRow().addInt("value", (int) value); + session.apply(insert); + } + Insert nullInsert = table.newInsert(); + nullInsert.getRow().addLong("key", i); + nullInsert.getRow().setNull("value"); + session.apply(nullInsert); + session.flush(); + + checkIntPredicates(table, values, createIntegerTestValues(Type.INT32)); + } + + @Test + public void testLongPredicates() throws Exception { + Schema schema = createTableSchema(Type.INT64); + client.createTable("long-table", schema, + new CreateTableOptions().setRangePartitionColumns( + ImmutableList.of())); + KuduTable table = client.openTable("long-table"); + + NavigableSet values = createIntegerValues(Type.INT64); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + long i = 0; + for (long value : values) { + Insert insert = table.newInsert(); + insert.getRow().addLong("key", i++); + insert.getRow().addLong("value", value); + session.apply(insert); + } + Insert nullInsert = table.newInsert(); + nullInsert.getRow().addLong("key", i); + nullInsert.getRow().setNull("value"); + session.apply(nullInsert); + session.flush(); + + checkIntPredicates(table, values, createIntegerTestValues(Type.INT64)); + } + + @Test + public void testTimestampPredicate() throws Exception { + Schema schema = createTableSchema(Type.INT64); + client.createTable("timestamp-table", schema, createTableOptions()); + KuduTable table = client.openTable("timestamp-table"); + + NavigableSet values = createIntegerValues(Type.INT64); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + long i = 0; + for (long value : values) { + Insert insert = table.newInsert(); + insert.getRow().addLong("key", i++); + insert.getRow().addLong("value", value); + session.apply(insert); + } + Insert nullInsert = table.newInsert(); + nullInsert.getRow().addLong("key", i); + nullInsert.getRow().setNull("value"); + session.apply(nullInsert); + session.flush(); + + checkIntPredicates(table, values, createIntegerTestValues(Type.INT64)); + } + + @Test + public void testFloatPredicates() throws Exception { + Schema schema = createTableSchema(Type.FLOAT); + client.createTable("float-table", schema, createTableOptions()); + KuduTable table = client.openTable("float-table"); + + NavigableSet values = createFloatValues(); + final List testValues = createFloatTestValues(); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + long i = 0; + for (float value : values) { + Insert insert = table.newInsert(); + insert.getRow().addLong("key", i++); + insert.getRow().addFloat("value", value); + session.apply(insert); + } + Insert nullInsert = table.newInsert(); + nullInsert.getRow().addLong("key", i); + nullInsert.getRow().setNull("value"); + session.apply(nullInsert); + session.flush(); + + ColumnSchema col = table.getSchema().getColumn("value"); + Assert.assertEquals(values.size() + 1, countRows(table)); + + for (float v : testValues) { + // value = v + KuduPredicate equal = KuduPredicate.newComparisonPredicate(col, ComparisonOp.EQUAL, v); + Assert.assertEquals(values.subSet(v, true, v, true).size(), countRows(table, equal)); + + // value >= v + KuduPredicate greaterEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER_EQUAL, v); + Assert.assertEquals(values.tailSet(v).size(), countRows(table, greaterEqual)); + + // value <= v + KuduPredicate lessEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS_EQUAL, v); + Assert.assertEquals(values.headSet(v, true).size(), countRows(table, lessEqual)); + + // value > v + KuduPredicate greater = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER, v); + Assert.assertEquals(values.tailSet(v, false).size(), countRows(table, greater)); + + // value < v + KuduPredicate less = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS, v); + Assert.assertEquals(values.headSet(v).size(), countRows(table, less)); + } + + KuduPredicate isNotNull = KuduPredicate.newIsNotNullPredicate(col); + Assert.assertEquals(values.size(), countRows(table, isNotNull)); + + KuduPredicate isNull = KuduPredicate.newIsNullPredicate(col); + Assert.assertEquals(1, countRows(table, isNull)); + } + + @Test + public void testDoublePredicates() throws Exception { + Schema schema = createTableSchema(Type.DOUBLE); + client.createTable("double-table", schema, createTableOptions()); + KuduTable table = client.openTable("double-table"); + + NavigableSet values = createDoubleValues(); + final List testValues = createDoubleTestValues(); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + long i = 0; + for (double value : values) { + Insert insert = table.newInsert(); + insert.getRow().addLong("key", i++); + insert.getRow().addDouble("value", value); + session.apply(insert); + } + Insert nullInsert = table.newInsert(); + nullInsert.getRow().addLong("key", i); + nullInsert.getRow().setNull("value"); + session.apply(nullInsert); + session.flush(); + + ColumnSchema col = table.getSchema().getColumn("value"); + Assert.assertEquals(values.size() + 1, countRows(table)); + + for (double v : testValues) { + // value = v + KuduPredicate equal = KuduPredicate.newComparisonPredicate(col, ComparisonOp.EQUAL, v); + Assert.assertEquals(values.subSet(v, true, v, true).size(), countRows(table, equal)); + + // value >= v + KuduPredicate greaterEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER_EQUAL, v); + Assert.assertEquals(values.tailSet(v).size(), countRows(table, greaterEqual)); + + // value <= v + KuduPredicate lessEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS_EQUAL, v); + Assert.assertEquals(values.headSet(v, true).size(), countRows(table, lessEqual)); + + // value > v + KuduPredicate greater = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER, v); + Assert.assertEquals(values.tailSet(v, false).size(), countRows(table, greater)); + + // value < v + KuduPredicate less = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS, v); + Assert.assertEquals(values.headSet(v).size(), countRows(table, less)); + } + + KuduPredicate isNotNull = KuduPredicate.newIsNotNullPredicate(col); + Assert.assertEquals(values.size(), countRows(table, isNotNull)); + + KuduPredicate isNull = KuduPredicate.newIsNullPredicate(col); + Assert.assertEquals(1, countRows(table, isNull)); + } + + @Test + public void testDecimalPredicates() throws Exception { + ColumnSchema key = new ColumnSchema.ColumnSchemaBuilder("key", Type.INT64).key(true).build(); + ColumnSchema val = new ColumnSchema.ColumnSchemaBuilder("value", Type.DECIMAL) + .typeAttributes(DecimalUtil.typeAttributes(4, 2)).nullable(true).build(); + Schema schema = new Schema(ImmutableList.of(key, val)); + + client.createTable("decimal-table", schema, createTableOptions()); + KuduTable table = client.openTable("decimal-table"); + + NavigableSet values = createDecimalValues(); + final List testValues = createDecimalTestValues(); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + long i = 0; + for (BigDecimal value : values) { + Insert insert = table.newInsert(); + insert.getRow().addLong("key", i++); + insert.getRow().addDecimal("value", value); + session.apply(insert); + } + Insert nullInsert = table.newInsert(); + nullInsert.getRow().addLong("key", i); + nullInsert.getRow().setNull("value"); + session.apply(nullInsert); + session.flush(); + + ColumnSchema col = table.getSchema().getColumn("value"); + Assert.assertEquals(values.size() + 1, countRows(table)); + + for (BigDecimal v : testValues) { + // value = v + KuduPredicate equal = KuduPredicate.newComparisonPredicate(col, ComparisonOp.EQUAL, v); + Assert.assertEquals(values.subSet(v, true, v, true).size(), countRows(table, equal)); + + // value >= v + KuduPredicate greaterEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER_EQUAL, v); + Assert.assertEquals(values.tailSet(v).size(), countRows(table, greaterEqual)); + + // value <= v + KuduPredicate lessEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS_EQUAL, v); + Assert.assertEquals(values.headSet(v, true).size(), countRows(table, lessEqual)); + + // value > v + KuduPredicate greater = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER, v); + Assert.assertEquals(values.tailSet(v, false).size(), countRows(table, greater)); + + // value < v + KuduPredicate less = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS, v); + Assert.assertEquals(values.headSet(v).size(), countRows(table, less)); + } + + KuduPredicate isNotNull = KuduPredicate.newIsNotNullPredicate(col); + Assert.assertEquals(values.size(), countRows(table, isNotNull)); + + KuduPredicate isNull = KuduPredicate.newIsNullPredicate(col); + Assert.assertEquals(1, countRows(table, isNull)); + } + + @Test + public void testStringPredicates() throws Exception { + testVarlengthPredicates(Type.STRING); + } + + @Test + public void testVarcharPredicates() throws Exception { + testVarlengthPredicates(Type.VARCHAR); + } + + private void testVarlengthPredicates(Type type) throws Exception { + Schema schema = createTableSchema(type); + client.createTable("string-table", schema, createTableOptions()); + KuduTable table = client.openTable("string-table"); + + NavigableSet values = createStringValues(); + final List testValues = createStringTestValues(); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + long i = 0; + for (String value : values) { + Insert insert = table.newInsert(); + insert.getRow().addLong("key", i++); + switch (type) { + case VARCHAR: + insert.getRow().addVarchar("value", value); + break; + case STRING: + insert.getRow().addString("value", value); + break; + default: + throw new IllegalArgumentException("CHAR/VARCHAR/STRING expected"); + } + session.apply(insert); + } + Insert nullInsert = table.newInsert(); + nullInsert.getRow().addLong("key", i); + nullInsert.getRow().setNull("value"); + session.apply(nullInsert); + session.flush(); + + ColumnSchema col = table.getSchema().getColumn("value"); + Assert.assertEquals(values.size() + 1, countRows(table)); + + for (String v : testValues) { + // value = v + KuduPredicate equal = KuduPredicate.newComparisonPredicate(col, ComparisonOp.EQUAL, v); + Assert.assertEquals(values.subSet(v, true, v, true).size(), countRows(table, equal)); + + // value >= v + KuduPredicate greaterEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER_EQUAL, v); + Assert.assertEquals(values.tailSet(v).size(), countRows(table, greaterEqual)); + + // value <= v + KuduPredicate lessEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS_EQUAL, v); + Assert.assertEquals(values.headSet(v, true).size(), countRows(table, lessEqual)); + + // value > v + KuduPredicate greater = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER, v); + Assert.assertEquals(values.tailSet(v, false).size(), countRows(table, greater)); + + // value < v + KuduPredicate less = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS, v); + Assert.assertEquals(values.headSet(v).size(), countRows(table, less)); + } + + KuduPredicate isNotNull = KuduPredicate.newIsNotNullPredicate(col); + Assert.assertEquals(values.size(), countRows(table, isNotNull)); + + KuduPredicate isNull = KuduPredicate.newIsNullPredicate(col); + Assert.assertEquals(1, countRows(table, isNull)); + } + + @Test + public void testBinaryPredicates() throws Exception { + Schema schema = createTableSchema(Type.BINARY); + client.createTable("binary-table", schema, createTableOptions()); + KuduTable table = client.openTable("binary-table"); + + NavigableSet values = createStringValues(); + final List testValues = createStringTestValues(); + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + long i = 0; + for (String value : values) { + Insert insert = table.newInsert(); + insert.getRow().addLong("key", i++); + insert.getRow().addBinary("value", Bytes.fromString(value)); + session.apply(insert); + } + Insert nullInsert = table.newInsert(); + nullInsert.getRow().addLong("key", i); + nullInsert.getRow().setNull("value"); + session.apply(nullInsert); + session.flush(); + + ColumnSchema col = table.getSchema().getColumn("value"); + Assert.assertEquals(values.size() + 1, countRows(table)); + + for (String s : testValues) { + byte[] v = Bytes.fromString(s); + // value = v + KuduPredicate equal = KuduPredicate.newComparisonPredicate(col, ComparisonOp.EQUAL, v); + Assert.assertEquals(values.subSet(s, true, s, true).size(), countRows(table, equal)); + + // value >= v + KuduPredicate greaterEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER_EQUAL, v); + Assert.assertEquals(values.tailSet(s).size(), countRows(table, greaterEqual)); + + // value <= v + KuduPredicate lessEqual = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS_EQUAL, v); + Assert.assertEquals(values.headSet(s, true).size(), countRows(table, lessEqual)); + + // value > v + KuduPredicate greater = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.GREATER, v); + Assert.assertEquals(values.tailSet(s, false).size(), countRows(table, greater)); + + // value < v + KuduPredicate less = + KuduPredicate.newComparisonPredicate(col, ComparisonOp.LESS, v); + Assert.assertEquals(values.headSet(s).size(), countRows(table, less)); + } + + KuduPredicate isNotNull = KuduPredicate.newIsNotNullPredicate(col); + Assert.assertEquals(values.size(), countRows(table, isNotNull)); + + KuduPredicate isNull = KuduPredicate.newIsNullPredicate(col); + Assert.assertEquals(1, countRows(table, isNull)); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestScanToken.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestScanToken.java new file mode 100644 index 0000000000..22aedbd6c2 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestScanToken.java @@ -0,0 +1,943 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.ClientTestUtil.countScanTokenRows; +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.createDefaultTable; +import static org.apache.kudu.test.ClientTestUtil.createManyStringsSchema; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.apache.kudu.test.ClientTestUtil.loadDefaultTable; +import static org.apache.kudu.test.MetricTestUtils.totalRequestCount; +import static org.apache.kudu.test.MetricTestUtils.validateRequestCount; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.protobuf.CodedInputStream; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.cluster.KuduBinaryLocator; +import org.apache.kudu.test.cluster.MiniKuduCluster; + +public class TestScanToken { + private static final Logger LOG = LoggerFactory.getLogger(TestKuduClient.class); + + private static final String testTableName = "TestScanToken"; + + private KuduClient client; + private AsyncKuduClient asyncClient; + + // Enable Kerberos and access control so we can validate the requests in secure environment. + // Specifically that authz tokens in the scan tokens work. + private static final MiniKuduCluster.MiniKuduClusterBuilder clusterBuilder = + KuduTestHarness.getBaseClusterBuilder() + .enableKerberos() + .addTabletServerFlag("--tserver_enforce_access_control=true"); + + @Rule + public KuduTestHarness harness = new KuduTestHarness(clusterBuilder); + + @Before + public void setUp() { + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + } + + /** + * Tests scan tokens by creating a set of scan tokens, serializing them, and + * then executing them in parallel with separate client instances. This + * simulates the normal usecase of scan tokens being created at a central + * planner and distributed to remote task executors. + */ + @Test + public void testScanTokens() throws Exception { + int saveFetchTablets = AsyncKuduClient.FETCH_TABLETS_PER_RANGE_LOOKUP; + try { + // For this test, make sure that we cover the case that not all tablets + // are returned in a single batch. + AsyncKuduClient.FETCH_TABLETS_PER_RANGE_LOOKUP = 4; + + Schema schema = createManyStringsSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.addHashPartitions(ImmutableList.of("key"), 8); + + PartialRow splitRow = schema.newPartialRow(); + splitRow.addString("key", "key_50"); + createOptions.addSplitRow(splitRow); + + client.createTable(testTableName, schema, createOptions); + + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + KuduTable table = client.openTable(testTableName); + for (int i = 0; i < 100; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", String.format("key_%02d", i)); + row.addString("c1", "c1_" + i); + row.addString("c2", "c2_" + i); + session.apply(insert); + } + session.flush(); + + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + tokenBuilder.batchSizeBytes(0); + tokenBuilder.setProjectedColumnIndexes(ImmutableList.of()); + List tokens = tokenBuilder.build(); + assertEquals(16, tokens.size()); + + // KUDU-1809, with batchSizeBytes configured to '0', + // the first call to the tablet server won't return + // any data. + { + KuduScanner scanner = tokens.get(0).intoScanner(client); + assertEquals(0, scanner.nextRows().getNumRows()); + } + + for (KuduScanToken token : tokens) { + // Sanity check to make sure the debug printing does not throw. + LOG.debug(KuduScanToken.stringifySerializedToken(token.serialize(), client)); + } + } finally { + AsyncKuduClient.FETCH_TABLETS_PER_RANGE_LOOKUP = saveFetchTablets; + } + } + + @Test + public void testScanTokenWithQueryId() throws Exception { + // Prepare the table for testing. + Schema schema = createManyStringsSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + final int buckets = 8; + createOptions.addHashPartitions(ImmutableList.of("key"), buckets); + client.createTable(testTableName, schema, createOptions); + + KuduSession session = client.newSession(); + KuduTable table = client.openTable(testTableName); + final int totalRows = 100; + for (int i = 0; i < totalRows; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", String.format("key_%02d", i)); + row.addString("c1", "c1_" + i); + row.addString("c2", "c2_" + i); + assertEquals(session.apply(insert).hasRowError(), false); + } + // Scan with specified query id. + { + int rowsScanned = 0; + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + tokenBuilder.setProjectedColumnIndexes(ImmutableList.of()); + tokenBuilder.setQueryId("query-id-for-test"); + List tokens = tokenBuilder.build(); + assertEquals(buckets, tokens.size()); + for (int i = 0; i < tokens.size(); i++) { + KuduScanner scanner = tokens.get(i).intoScanner(client); + while (scanner.hasMoreRows()) { + rowsScanned += scanner.nextRows().getNumRows(); + } + } + assertEquals(totalRows, rowsScanned); + } + // Scan with default query id. + { + int rowsScanned = 0; + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + tokenBuilder.setProjectedColumnIndexes(ImmutableList.of()); + List tokens = tokenBuilder.build(); + assertEquals(buckets, tokens.size()); + for (int i = 0; i < tokens.size(); i++) { + KuduScanner scanner = tokens.get(i).intoScanner(client); + while (scanner.hasMoreRows()) { + rowsScanned += scanner.nextRows().getNumRows(); + } + } + assertEquals(totalRows, rowsScanned); + } + } + + /** + * Regression test for KUDU-3349 + */ + @Test + public void testScanTokenWithWrongUuidSerialization() throws Exception { + // Prepare the table for testing. + Schema schema = createManyStringsSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + final int buckets = 8; + createOptions.addHashPartitions(ImmutableList.of("key"), buckets); + client.createTable(testTableName, schema, createOptions); + + KuduSession session = client.newSession(); + KuduTable table = client.openTable(testTableName); + final int totalRows = 100; + for (int i = 0; i < totalRows; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", String.format("key_%02d", i)); + row.addString("c1", "c1_" + i); + row.addString("c2", "c2_" + i); + assertEquals(session.apply(insert).hasRowError(), false); + } + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + tokenBuilder.setProjectedColumnIndexes(ImmutableList.of()); + List tokens = tokenBuilder.build(); + assertEquals(buckets, tokens.size()); + + // Create a new client, open the newly created kudu table, and new scanners. + AsyncKuduClient newAsyncClient = new AsyncKuduClient.AsyncKuduClientBuilder( + harness.getMasterAddressesAsString()) + .build(); + KuduClient newClient = newAsyncClient.syncClient(); + KuduTable newTable = newClient.openTable(testTableName); + List kuduScanners = new ArrayList<>(buckets); + List tabletIds = new ArrayList<>(buckets); + for (KuduScanToken token : tokens) { + tabletIds.add(new String(token.getTablet().getTabletId(), + java.nio.charset.StandardCharsets.UTF_8)); + KuduScanner kuduScanner = token.intoScanner(newAsyncClient.syncClient()); + kuduScanners.add(kuduScanner); + } + + // Step down all tablet leaders. + KuduBinaryLocator.ExecutableInfo exeInfo = null; + try { + exeInfo = KuduBinaryLocator.findBinary("kudu"); + } catch (FileNotFoundException e) { + LOG.error(e.getMessage()); + fail(); + } + for (String tabletId : tabletIds) { + List commandLine = Lists.newArrayList(exeInfo.exePath(), + "tablet", + "leader_step_down", + harness.getMasterAddressesAsString(), + tabletId); + ProcessBuilder processBuilder = new ProcessBuilder(commandLine); + processBuilder.environment().putAll(exeInfo.environment()); + // Step down the tablet leaders one by one after a fix duration. + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + LOG.error(e.getMessage()); + } + } + // Delete all rows first through the new client. + KuduSession newSession = newClient.newSession(); + + for (int i = 0; i < totalRows; i++) { + Operation del = newTable.newDelete(); + PartialRow row = del.getRow(); + row.addString("key", String.format("key_%02d", i)); + del.setRow(row); + OperationResponse response = newSession.apply(del); + assertEquals(response.hasRowError(), false); + } + + // Insert all rows again through the new client. + for (int i = 0; i < totalRows; i++) { + Insert insert = newTable.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", String.format("key_%02d", i)); + row.addString("c1", "c1_" + i); + row.addString("c2", "c2_" + i); + assertEquals(newSession.apply(insert).hasRowError(), false); + } + + // Verify all the row count. + int rowCount = 0; + for (KuduScanner kuduScanner : kuduScanners) { + while (kuduScanner.hasMoreRows()) { + rowCount += kuduScanner.nextRows().numRows; + } + } + assertEquals(totalRows, rowCount); + } + + /** + * Tests scan token creation and execution on a table with non-covering range partitions. + */ + @Test + public void testScanTokensNonCoveringRangePartitions() throws Exception { + Schema schema = createManyStringsSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.addHashPartitions(ImmutableList.of("key"), 2); + + PartialRow lower = schema.newPartialRow(); + PartialRow upper = schema.newPartialRow(); + lower.addString("key", "a"); + upper.addString("key", "f"); + createOptions.addRangePartition(lower, upper); + + lower = schema.newPartialRow(); + upper = schema.newPartialRow(); + lower.addString("key", "h"); + upper.addString("key", "z"); + createOptions.addRangePartition(lower, upper); + + PartialRow split = schema.newPartialRow(); + split.addString("key", "k"); + createOptions.addSplitRow(split); + + client.createTable(testTableName, schema, createOptions); + + KuduSession session = client.newSession(); + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + KuduTable table = client.openTable(testTableName); + for (char c = 'a'; c < 'f'; c++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", "" + c); + row.addString("c1", "c1_" + c); + row.addString("c2", "c2_" + c); + session.apply(insert); + } + for (char c = 'h'; c < 'z'; c++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString("key", "" + c); + row.addString("c1", "c1_" + c); + row.addString("c2", "c2_" + c); + session.apply(insert); + } + session.flush(); + + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + tokenBuilder.setProjectedColumnIndexes(ImmutableList.of()); + List tokens = tokenBuilder.build(); + assertEquals(6, tokens.size()); + assertEquals('f' - 'a' + 'z' - 'h', + countScanTokenRows(tokens, + client.getMasterAddressesAsString(), + client.getDefaultOperationTimeoutMs())); + + for (KuduScanToken token : tokens) { + // Sanity check to make sure the debug printing does not throw. + LOG.debug(KuduScanToken.stringifySerializedToken(token.serialize(), client)); + } + } + + /** + * Tests the results of creating scan tokens, altering the columns being + * scanned, and then executing the scan tokens. + */ + @Test + public void testScanTokensConcurrentAlterTable() throws Exception { + Schema schema = new Schema(ImmutableList.of( + new ColumnSchema.ColumnSchemaBuilder("key", Type.INT64).nullable(false).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder("a", Type.INT64).nullable(false).key(false).build() + )); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.setRangePartitionColumns(ImmutableList.of()); + createOptions.setNumReplicas(1); + client.createTable(testTableName, schema, createOptions); + + KuduTable table = client.openTable(testTableName); + + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + List tokens = tokenBuilder.includeTableMetadata(false).build(); + List tokensWithMetadata = tokenBuilder.includeTableMetadata(true).build(); + assertEquals(1, tokens.size()); + assertEquals(1, tokensWithMetadata.size()); + KuduScanToken token = tokens.get(0); + KuduScanToken tokenWithMetadata = tokensWithMetadata.get(0); + + // Drop a column + client.alterTable(testTableName, new AlterTableOptions().dropColumn("a")); + try { + token.intoScanner(client); + fail(); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Unknown column")); + } + try { + KuduScanner scanner = tokenWithMetadata.intoScanner(client); + countRowsInScan(scanner); + fail(); + } catch (KuduException e) { + assertTrue(e.getMessage().contains("Some columns are not present in the current schema: a")); + } + + // Add a column with the same name, type, and nullability. It will have a different id-- it's a + // different column-- so the scan token will fail. + client.alterTable( + testTableName, + new AlterTableOptions() + .addColumn(new ColumnSchema.ColumnSchemaBuilder("a", Type.INT64) + .nullable(false) + .defaultValue(0L).build())); + try { + token.intoScanner(client); + fail(); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains( + "Unknown column")); + } + } + + /** + * Tests that it is possible to create a scan token, rename a column, and rehydrate a scanner from + * the scan token with the old column name. + */ + @Test + public void testScanTokensConcurrentColumnRename() throws Exception { + Schema schema = getBasicSchema(); + final String oldColName = schema.getColumnByIndex(1).getName(); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.setRangePartitionColumns(ImmutableList.of()); + createOptions.setNumReplicas(1); + client.createTable(testTableName, schema, createOptions); + + KuduTable table = client.openTable(testTableName); + + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + // TODO(KUDU-3146): Disable including the table metadata so the new column name is retrieved + // when deserializing the scanner. + tokenBuilder.includeTableMetadata(false); + List tokens = tokenBuilder.build(); + assertEquals(1, tokens.size()); + KuduScanToken token = tokens.get(0); + + // Rename a column. + String newColName = "new-name"; + client.alterTable(testTableName, new AlterTableOptions().renameColumn(oldColName, newColName)); + + KuduScanner scanner = token.intoScanner(client); + + // TODO(KUDU-3146): Handle renaming a column between when the token is rehydrated as a scanner + // and when the scanner first hits a replica. Note that this is almost certainly a very + // short period of vulnerability. + + assertEquals(0, countRowsInScan(scanner)); + + // Test that the old name cannot be used and the new name can be. + Schema alteredSchema = scanner.getProjectionSchema(); + try { + alteredSchema.getColumn(oldColName); + fail(); + } catch (IllegalArgumentException ex) { + // Good. + } + alteredSchema.getColumn(newColName); + } + + /** + * Tests that it is possible to rehydrate a scan token after a table rename. + */ + @Test + public void testScanTokensWithTableRename() throws Exception { + Schema schema = getBasicSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.setRangePartitionColumns(ImmutableList.of()); + createOptions.setNumReplicas(1); + KuduTable table = client.createTable(testTableName, schema, createOptions); + + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + List tokens = tokenBuilder.build(); + assertEquals(1, tokens.size()); + KuduScanToken token = tokens.get(0); + + // Rename the table. + client.alterTable( + testTableName, + new AlterTableOptions().renameTable(testTableName + "-renamed")); + + assertEquals(0, countRowsInScan(token.intoScanner(client))); + } + + /** + * Tests scan token creation and execution on a table with interleaved range partition drops. + */ + @Test + public void testScanTokensInterleavedRangePartitionDrops() throws Exception { + Schema schema = getBasicSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.addHashPartitions(ImmutableList.of("key"), 2); + + final int numRows = 30; + PartialRow lower0 = schema.newPartialRow(); + PartialRow upper0 = schema.newPartialRow(); + lower0.addInt("key", 0); + upper0.addInt("key", numRows / 3); + createOptions.addRangePartition(lower0, upper0); + + PartialRow lower1 = schema.newPartialRow(); + PartialRow upper1 = schema.newPartialRow(); + lower1.addInt("key", numRows / 3); + upper1.addInt("key", 2 * numRows / 3); + createOptions.addRangePartition(lower1, upper1); + + PartialRow lower2 = schema.newPartialRow(); + PartialRow upper2 = schema.newPartialRow(); + lower2.addInt("key", 2 * numRows / 3); + upper2.addInt("key", numRows); + createOptions.addRangePartition(lower2, upper2); + + KuduTable table = client.createTable(testTableName, schema, createOptions); + KuduSession session = client.newSession(); + for (int i = 0; i < numRows; i++) { + session.apply(createBasicSchemaInsert(table, i)); + } + + // Build the scan tokens. + List tokens = client.newScanTokenBuilder(table).build(); + assertEquals(6, tokens.size()); + + // Drop the range partition [10, 20). + AlterTableOptions dropMiddleOptions = new AlterTableOptions(); + dropMiddleOptions.dropRangePartition(lower1, upper1); + client.alterTable(table.getName(), dropMiddleOptions); + + // Rehydrate the tokens. + List scanners = new ArrayList<>(); + for (KuduScanToken token : tokens) { + scanners.add(token.intoScanner(client)); + } + + // Drop the range partition [20, 30). + AlterTableOptions dropEndOptions = new AlterTableOptions(); + dropEndOptions.dropRangePartition(lower2, upper2); + client.alterTable(table.getName(), dropEndOptions); + + // Check the scanners work. The scanners for the tablets in the range [10, 20) definitely won't + // see any rows. The scanners for the tablets in the range [20, 30) might see rows. + int scannedRows = 0; + for (KuduScanner scanner : scanners) { + scannedRows += countRowsInScan(scanner); + } + assertTrue(String.format("%d >= %d / 3?", scannedRows, numRows), + scannedRows >= numRows / 3); + assertTrue(String.format("%d <= 2 * %d / 3?", scannedRows, numRows), + scannedRows <= 2 * numRows / 3); + } + + /** + * Test that scanRequestTimeout makes it from the scan token to the underlying Scanner class. + */ + @Test + public void testScanRequestTimeout() throws IOException { + final int NUM_ROWS_DESIRED = 100; + final int SCAN_REQUEST_TIMEOUT_MS = 20; + KuduTable table = createDefaultTable(client, testTableName); + loadDefaultTable(client, testTableName, NUM_ROWS_DESIRED); + KuduScanToken.KuduScanTokenBuilder builder = + new KuduScanToken.KuduScanTokenBuilder(asyncClient, table); + builder.scanRequestTimeout(SCAN_REQUEST_TIMEOUT_MS); + List tokens = builder.build(); + for (KuduScanToken token : tokens) { + byte[] serialized = token.serialize(); + KuduScanner scanner = KuduScanToken.deserializeIntoScanner(serialized, client); + assertEquals(SCAN_REQUEST_TIMEOUT_MS, scanner.getScanRequestTimeout()); + } + } + + // Helper for scan token tests that use diff scan. + private long setupTableForDiffScans(KuduClient client, + KuduTable table, + int numRows) throws Exception { + KuduSession session = client.newSession(); + for (int i = 0; i < numRows / 2; i++) { + session.apply(createBasicSchemaInsert(table, i)); + } + + // Grab the timestamp, then add more data so there's a diff. + long timestamp = client.getLastPropagatedTimestamp(); + for (int i = numRows / 2; i < numRows; i++) { + session.apply(createBasicSchemaInsert(table, i)); + } + // Delete some data so the is_deleted column can be tested. + for (int i = 0; i < numRows / 4; i++) { + Delete delete = table.newDelete(); + PartialRow row = delete.getRow(); + row.addInt(0, i); + session.apply(delete); + } + + return timestamp; + } + + // Helper to check diff scan results. + private void checkDiffScanResults(KuduScanner scanner, + int numExpectedMutations, + int numExpectedDeletes) throws KuduException { + int numMutations = 0; + int numDeletes = 0; + while (scanner.hasMoreRows()) { + for (RowResult rowResult : scanner.nextRows()) { + numMutations++; + if (rowResult.isDeleted()) { + numDeletes++; + } + } + } + assertEquals(numExpectedMutations, numMutations); + assertEquals(numExpectedDeletes, numDeletes); + } + + /** + * Test that scan tokens work with diff scans. + */ + @Test + public void testDiffScanTokens() throws Exception { + Schema schema = getBasicSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.setRangePartitionColumns(ImmutableList.of()); + createOptions.setNumReplicas(1); + KuduTable table = client.createTable(testTableName, schema, createOptions); + + // Set up the table for a diff scan. + int numRows = 20; + long timestamp = setupTableForDiffScans(client, table, numRows); + + // Since the diff scan interval is [start, end), increment the start timestamp to exclude + // the last row inserted in the first group of ops, and increment the end timestamp to include + // the last row deleted in the second group of ops. + List tokens = client.newScanTokenBuilder(table) + // TODO(KUDU-3146): Disable including the table metadata so the new column name is + // retrieved when deserializing the scanner. + .includeTableMetadata(false) + .diffScan(timestamp + 1, client.getLastPropagatedTimestamp() + 1) + .build(); + assertEquals(1, tokens.size()); + + checkDiffScanResults(tokens.get(0).intoScanner(client), 3 * numRows / 4, numRows / 4); + } + + /** + * Test that scan tokens work with diff scans even when columns are renamed. + */ + @Test + public void testDiffScanTokensConcurrentColumnRename() throws Exception { + Schema schema = getBasicSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.setRangePartitionColumns(ImmutableList.of()); + createOptions.setNumReplicas(1); + KuduTable table = client.createTable(testTableName, schema, createOptions); + + // Set up the table for a diff scan. + int numRows = 20; + long timestamp = setupTableForDiffScans(client, table, numRows); + + // Since the diff scan interval is [start, end), increment the start timestamp to exclude + // the last row inserted in the first group of ops, and increment the end timestamp to include + // the last row deleted in the second group of ops. + List tokens = client.newScanTokenBuilder(table) + // TODO(KUDU-3146): Disable including the table metadata so the new column name is + // retrieved when deserializing the scanner. + .includeTableMetadata(false) + .diffScan(timestamp + 1, client.getLastPropagatedTimestamp() + 1) + .build(); + assertEquals(1, tokens.size()); + + // Rename a column between when the token is created and when it is rehydrated into a scanner + client.alterTable(table.getName(), + new AlterTableOptions().renameColumn("column1_i", "column1_i_new")); + + KuduScanner scanner = tokens.get(0).intoScanner(client); + + // TODO(KUDU-3146): Handle renaming a column between when the token is rehydrated as a scanner + // and when the scanner first hits a replica. Note that this is almost certainly a very + // short period of vulnerability. + + checkDiffScanResults(scanner, 3 * numRows / 4, numRows / 4); + } + + @Test + public void testScanTokenRequestsWithMetadata() throws Exception { + Schema schema = getBasicSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.setRangePartitionColumns(ImmutableList.of()); + createOptions.setNumReplicas(1); + KuduTable table = client.createTable(testTableName, schema, createOptions); + + // Use a new client to simulate hydrating in a new process. + KuduClient newClient = + new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()).build(); + newClient.getTablesList(); // List the tables to prevent counting initialization RPCs. + // Ensure the client doesn't have an authorization token for the table. + assertNull(newClient.asyncClient.getAuthzTokenCache().get(table.getTableId())); + + KuduMetrics.logMetrics(); // Log the metric values to help debug failures. + final long beforeRequests = totalRequestCount(); + + // Validate that building a scan token results in a single GetTableLocations request. + KuduScanToken token = validateRequestCount(1, client.getClientId(), + "GetTableLocations", () -> { + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + List tokens = tokenBuilder.includeTableMetadata(true).build(); + assertEquals(1, tokens.size()); + return tokens.get(0); + }); + + // Validate that hydrating a token doesn't result in a request. + KuduScanner scanner = validateRequestCount(0, newClient.getClientId(), + () -> token.intoScanner(newClient)); + // Ensure the client now has an authorization token. + assertNotNull(newClient.asyncClient.getAuthzTokenCache().get(table.getTableId())); + + // Validate that starting to scan results in a Scan request. + validateRequestCount(1, newClient.getClientId(), "Scan", + scanner::nextRows); + + final long afterRequests = totalRequestCount(); + + // Validate no other unexpected requests were sent. + // GetTableLocations, Scan. + KuduMetrics.logMetrics(); // Log the metric values to help debug failures. + assertEquals(2, afterRequests - beforeRequests); + } + + @Test + public void testScanTokenRequestsNoMetadata() throws Exception { + Schema schema = getBasicSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.setRangePartitionColumns(ImmutableList.of()); + createOptions.setNumReplicas(1); + KuduTable table = client.createTable(testTableName, schema, createOptions); + + // Use a new client to simulate hydrating in a new process. + KuduClient newClient = + new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()).build(); + newClient.getTablesList(); // List the tables to prevent counting initialization RPCs. + + KuduMetrics.logMetrics(); // Log the metric values to help debug failures. + long beforeRequests = totalRequestCount(); + + // Validate that building a scan token results in a single GetTableLocations request. + KuduScanToken token = validateRequestCount(1, client.getClientId(), + "GetTableLocations", () -> { + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + List tokens = tokenBuilder + .includeTableMetadata(false) + .includeTabletMetadata(false) + .build(); + assertEquals(1, tokens.size()); + return tokens.get(0); + }); + + // Validate that hydrating a token into a scanner results in a single GetTableSchema request. + KuduScanner scanner = validateRequestCount(1, newClient.getClientId(), "GetTableSchema", + () -> token.intoScanner(newClient)); + + // Validate that starting to scan results in a GetTableLocations request and a Scan request. + validateRequestCount(2, newClient.getClientId(), Arrays.asList("GetTableLocations", "Scan"), + scanner::nextRows); + + long afterRequests = totalRequestCount(); + + // Validate no other unexpected requests were sent. + // GetTableLocations x 2, GetTableSchema, Scan. + KuduMetrics.logMetrics(); // Log the metric values to help debug failures. + assertEquals(4, afterRequests - beforeRequests); + } + + @Test + public void testScanTokenSize() throws Exception { + List columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.INT8).key(true).build()); + for (int i = 0; i < 100; i++) { + columns.add(new ColumnSchema.ColumnSchemaBuilder("int64-" + i, Type.INT64).build()); + } + Schema schema = new Schema(columns); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.setRangePartitionColumns(ImmutableList.of()); + createOptions.setNumReplicas(1); + KuduTable table = client.createTable(testTableName, schema, createOptions); + + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + List tokens = tokenBuilder + .includeTabletMetadata(false) + .includeTableMetadata(false) + .build(); + assertEquals(1, tokens.size()); + final byte[] tokenBytes = tokens.get(0).serialize(); + + List tokensWithTabletMetadata = tokenBuilder + .includeTabletMetadata(true) + .includeTableMetadata(false) + .build(); + assertEquals(1, tokensWithTabletMetadata.size()); + final byte[] tokenWithTabletMetadataBytes = tokensWithTabletMetadata.get(0).serialize(); + + List tokensWithTableMetadata = tokenBuilder + .includeTabletMetadata(false) + .includeTableMetadata(true) + .build(); + assertEquals(1, tokensWithTabletMetadata.size()); + final byte[] tokenWithTableMetadataBytes = tokensWithTableMetadata.get(0).serialize(); + + List tokensWithAllMetadata = tokenBuilder + .includeTabletMetadata(true) + .includeTableMetadata(true) + .build(); + assertEquals(1, tokensWithAllMetadata.size()); + final byte[] tokenWithAllMetadataBytes = tokensWithAllMetadata.get(0).serialize(); + + LOG.info("tokenBytes: " + tokenBytes.length); + LOG.info("tokenWithTabletMetadataBytes: " + tokenWithTabletMetadataBytes.length); + LOG.info("tokenWithTableMetadataBytes: " + tokenWithTableMetadataBytes.length); + LOG.info("tokenWithAllMetadataBytes: " + tokenWithAllMetadataBytes.length); + + assertTrue(tokenWithAllMetadataBytes.length > tokenWithTableMetadataBytes.length); + assertTrue(tokenWithTableMetadataBytes.length > tokenWithTabletMetadataBytes.length); + assertTrue(tokenWithTabletMetadataBytes.length > tokenBytes.length); + } + + @Test + public void testScanTokensWithExtraPredicate() throws IOException { + final int NUM_ROWS_DESIRED = 100; + final int PREDICATE_INDEX = 0; + final int PREDICATE_VAL = 1; + KuduTable table = createDefaultTable(client, testTableName); + loadDefaultTable(client, testTableName, NUM_ROWS_DESIRED); + KuduScanToken.KuduScanTokenBuilder builder = + new KuduScanToken.KuduScanTokenBuilder(asyncClient, table); + List tokens = builder.build(); + ColumnSchema cs = table.getSchema().getColumnByIndex(PREDICATE_INDEX); + KuduPredicate predicate = KuduPredicate.newComparisonPredicate( + cs, KuduPredicate.ComparisonOp.EQUAL, PREDICATE_VAL); + Set resultKeys = new HashSet<>(); + for (KuduScanToken token : tokens) { + byte[] serialized = token.serialize(); + KuduScanner.KuduScannerBuilder scannerBuilder = KuduScanToken.deserializeIntoScannerBuilder( + serialized, client); + scannerBuilder.addPredicate(predicate); + KuduScanner scanner = scannerBuilder.build(); + for (RowResult rowResult : scanner) { + resultKeys.add(rowResult.getInt(PREDICATE_INDEX)); + } + } + assertEquals(1, resultKeys.size()); + assertEquals(PREDICATE_VAL, Iterables.getOnlyElement(resultKeys).intValue()); + } + + /** + * Verify the deserialization of RemoteTablet from KuduScanToken. + * Regression test for KUDU-3349. + */ + @Test + public void testRemoteTabletVerification() throws IOException { + final int NUM_ROWS_DESIRED = 100; + KuduTable table = createDefaultTable(client, testTableName); + loadDefaultTable(client, testTableName, NUM_ROWS_DESIRED); + KuduScanToken.KuduScanTokenBuilder builder = + new KuduScanToken.KuduScanTokenBuilder(asyncClient, table); + List tokens = builder.build(); + List tservers = harness.getTabletServers(); + for (KuduScanToken token : tokens) { + byte[] serialized = token.serialize(); + Client.ScanTokenPB scanTokenPB = + Client.ScanTokenPB.parseFrom(CodedInputStream.newInstance(serialized)); + Client.TabletMetadataPB tabletMetadata = scanTokenPB.getTabletMetadata(); + Partition partition = + ProtobufHelper.pbToPartition(tabletMetadata.getPartition()); + RemoteTablet remoteTablet = KuduScanToken.newRemoteTabletFromTabletMetadata(tabletMetadata, + table.getTableId(), partition); + for (ServerInfo si : remoteTablet.getTabletServersCopy()) { + assertEquals(si.getUuid().length(), 32); + HostAndPort hostAndPort = si.getHostAndPort(); + assertEquals(tservers.contains(hostAndPort), true); + } + } + } + + /** + * Regression test for KUDU-3205. + */ + @Test + public void testBuildTokensWithDownTabletServer() throws Exception { + Schema schema = getBasicSchema(); + CreateTableOptions createOptions = new CreateTableOptions(); + createOptions.setRangePartitionColumns(ImmutableList.of()); + createOptions.setNumReplicas(3); + KuduTable table = client.createTable(testTableName, schema, createOptions); + + // Insert a row. + KuduSession session = client.newSession(); + Insert insert = createBasicSchemaInsert(table, 1); + session.apply(insert); + session.close(); + + // Remove a tablet server from the remote tablet by calling `removeTabletClient`. + // This is done in normal applications via AsyncKuduClient.invalidateTabletCache + // when a tablet not found error is handled. + TableLocationsCache.Entry entry = + asyncClient.getTableLocationEntry(table.getTableId(), insert.partitionKey()); + RemoteTablet remoteTablet = entry.getTablet(); + List tabletServers = remoteTablet.getTabletServersCopy(); + remoteTablet.removeTabletClient(tabletServers.get(0).getUuid()); + + // Ensure we can build and use the token without an error. + KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table); + tokenBuilder.includeTableMetadata(true); + tokenBuilder.includeTabletMetadata(true); + List tokens = tokenBuilder.build(); + assertEquals(1, tokens.size()); + + // Use a new client to simulate hydrating in a new process. + KuduClient newClient = + new KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()).build(); + KuduScanner scanner = tokens.get(0).intoScanner(newClient); + assertEquals(1, countRowsInScan(scanner)); + } + + @Test + public void testScannerBuilderFaultToleranceToggle() throws IOException { + KuduTable table = createDefaultTable(client, testTableName); + KuduScanner.KuduScannerBuilder scannerBuilder = + new KuduScanner.KuduScannerBuilder(asyncClient, table); + assertFalse(scannerBuilder.isFaultTolerant); + assertEquals(AsyncKuduScanner.ReadMode.READ_LATEST, scannerBuilder.readMode); + + scannerBuilder.setFaultTolerant(true); + assertTrue(scannerBuilder.isFaultTolerant); + assertEquals(AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT, scannerBuilder.readMode); + + scannerBuilder.setFaultTolerant(false); + assertFalse(scannerBuilder.isFaultTolerant); + assertEquals(AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT, scannerBuilder.readMode); + + scannerBuilder.readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES); + assertFalse(scannerBuilder.isFaultTolerant); + assertEquals(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES, scannerBuilder.readMode); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestScannerMultiTablet.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestScannerMultiTablet.java new file mode 100644 index 0000000000..03c700ac06 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestScannerMultiTablet.java @@ -0,0 +1,515 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.client; + +import static org.apache.kudu.Type.STRING; +import static org.apache.kudu.test.ClientTestUtil.countRowsInScan; +import static org.apache.kudu.test.KuduTestHarness.DEFAULT_SLEEP; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.stumbleupon.async.Deferred; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Common; +import org.apache.kudu.Schema; +import org.apache.kudu.client.Client.ScanTokenPB; +import org.apache.kudu.test.KuduTestHarness; + +public class TestScannerMultiTablet { + // Generate a unique table name + private static final String TABLE_NAME = + TestScannerMultiTablet.class.getName() + "-" + System.currentTimeMillis(); + + private static Schema schema = getSchema(); + + /** + * The timestamp after inserting the rows into the test table during setUp(). + */ + private long beforeWriteTimestamp; + private KuduTable table; + private KuduClient client; + private AsyncKuduClient asyncClient; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + @Before + public void setUp() throws Exception { + // create a 4-tablets table for scanning + CreateTableOptions builder = + new CreateTableOptions().setRangePartitionColumns(ImmutableList.of("key1", "key2")); + + for (int i = 1; i < 4; i++) { + PartialRow splitRow = schema.newPartialRow(); + splitRow.addString("key1", "" + i); + splitRow.addString("key2", ""); + builder.addSplitRow(splitRow); + } + + harness.getClient().createTable(TABLE_NAME, schema, builder); + + KuduTable insertTable = harness.getClient().openTable(TABLE_NAME); + AsyncKuduSession session = harness.getAsyncClient().newSession(); + session.setFlushMode(AsyncKuduSession.FlushMode.AUTO_FLUSH_SYNC); + + // The data layout ends up like this: + // tablet '', '1': no rows + // tablet '1', '2': '111', '122', '133' + // tablet '2', '3': '211', '222', '233' + // tablet '3', '': '311', '322', '333' + String[] keys = new String[] {"1", "2", "3"}; + for (String key1 : keys) { + for (String key2 : keys) { + Insert insert = insertTable.newInsert(); + PartialRow row = insert.getRow(); + row.addString(0, key1); + row.addString(1, key2); + row.addString(2, key2); + Deferred d = session.apply(insert); + d.join(DEFAULT_SLEEP); + } + } + + beforeWriteTimestamp = harness.getAsyncClient().getLastPropagatedTimestamp(); + + // Reset the clients in order to clear the propagated timestamp, which may + // have been set if other test cases ran before this one. This ensures + // that all tests set their own state. + harness.resetClients(); + // Reopen the table using the reset client. + table = harness.getClient().openTable(TABLE_NAME); + client = harness.getClient(); + asyncClient = harness.getAsyncClient(); + } + + private void validateResourceMetrics(ResourceMetrics resourceMetrics) { + assertTrue("queue_duration_nanos > 0", + resourceMetrics.getMetric("queue_duration_nanos") > 0L); + assertTrue("total_duration_nanos > 0", + resourceMetrics.getMetric("total_duration_nanos") > 0L); + } + + // Test scanner resource metrics. + @Test(timeout = 100000) + public void testResourceMetrics() throws Exception { + // Scan one tablet and the whole table. + AsyncKuduScanner oneTabletScanner = getScanner("1", "1", "1", "4"); // Whole second tablet. + assertEquals(3, countRowsInScan(oneTabletScanner)); + AsyncKuduScanner fullTableScanner = getScanner(null, null, null, null); + assertEquals(9, countRowsInScan(fullTableScanner)); + // Both scans should take a positive amount of wait duration, total duration, cpu user and cpu + // system time + validateResourceMetrics(oneTabletScanner.getResourceMetrics()); + validateResourceMetrics(fullTableScanner.getResourceMetrics()); + } + + // Test various combinations of start/end row keys. + @Test(timeout = 100000) + public void testKeyStartEnd() throws Exception { + assertEquals(0, + countRowsInScan(getScanner("", "", "1", ""))); // There's nothing in the 1st tablet + assertEquals(1, countRowsInScan(getScanner("", "", "1", "2"))); // Grab the very first row + assertEquals(3, countRowsInScan(getScanner("1", "1", "1", "4"))); // Grab the whole 2nd tablet + assertEquals(3, countRowsInScan(getScanner("1", "1", "2", ""))); // Same, and peek at the 3rd + assertEquals(3, countRowsInScan(getScanner("1", "1", "2", "0"))); // Same, different peek + assertEquals(4, + countRowsInScan(getScanner("1", "2", "2", "3"))); // Middle of 2nd to middle of 3rd + assertEquals(3, + countRowsInScan(getScanner("1", "4", "2", "4"))); // Peek at the 2nd then whole 3rd + assertEquals(6, countRowsInScan(getScanner("1", "5", "3", "4"))); // Whole 3rd and 4th + assertEquals(9, countRowsInScan(getScanner("", "", "4", ""))); // Full table scan + + assertEquals(9, + countRowsInScan(getScanner("", "", null, null))); // Full table scan with empty upper + assertEquals(9, + countRowsInScan(getScanner(null, null, "4", ""))); // Full table scan with empty lower + assertEquals(9, + countRowsInScan(getScanner(null, null, null, null))); // Full table scan with empty bounds + + // Test that we can close a scanner while in between two tablets. We start on the second + // tablet and our first nextRows() will get 3 rows. At that moment we want to close the scanner + // before getting on the 3rd tablet. + AsyncKuduScanner scanner = getScanner("1", "", null, null); + Deferred d = scanner.nextRows(); + RowResultIterator rri = d.join(DEFAULT_SLEEP); + assertEquals(3, rri.getNumRows()); + d = scanner.close(); + rri = d.join(DEFAULT_SLEEP); + assertNull(rri); + } + + // Test mixing start/end row keys with predicates. + @Test(timeout = 100000) + @SuppressWarnings("deprecation") + public void testKeysAndPredicates() throws Exception { + // Value that doesn't exist, predicates has primary column + ColumnRangePredicate predicate = new ColumnRangePredicate(schema.getColumnByIndex(1)); + predicate.setUpperBound("1"); + assertEquals(0, countRowsInScan(getScanner("1", "2", "1", "3", predicate))); + + // First row from the 2nd tablet. + predicate = new ColumnRangePredicate(schema.getColumnByIndex(2)); + predicate.setLowerBound("1"); + predicate.setUpperBound("1"); + assertEquals(1, countRowsInScan(getScanner("1", "", "2", "", predicate))); + + // All the 2nd tablet. + predicate = new ColumnRangePredicate(schema.getColumnByIndex(2)); + predicate.setLowerBound("1"); + predicate.setUpperBound("3"); + assertEquals(3, countRowsInScan(getScanner("1", "", "2", "", predicate))); + + // Value that doesn't exist. + predicate = new ColumnRangePredicate(schema.getColumnByIndex(2)); + predicate.setLowerBound("4"); + assertEquals(0, countRowsInScan(getScanner("1", "", "2", "", predicate))); + + // First row from every tablet. + predicate = new ColumnRangePredicate(schema.getColumnByIndex(2)); + predicate.setLowerBound("1"); + predicate.setUpperBound("1"); + assertEquals(3, countRowsInScan(getScanner(null, null, null, null, predicate))); + + // All the rows. + predicate = new ColumnRangePredicate(schema.getColumnByIndex(2)); + predicate.setLowerBound("1"); + assertEquals(9, countRowsInScan(getScanner(null, null, null, null, predicate))); + } + + @Test(timeout = 100000) + public void testProjections() throws Exception { + // Test with column names. + AsyncKuduScanner.AsyncKuduScannerBuilder builder = asyncClient.newScannerBuilder(table); + builder.setProjectedColumnNames(Lists.newArrayList(schema.getColumnByIndex(0).getName(), + schema.getColumnByIndex(1).getName())); + buildScannerAndCheckColumnsCount(builder, 2); + + // Test with column indexes. + builder = asyncClient.newScannerBuilder(table); + builder.setProjectedColumnIndexes(Lists.newArrayList(0, 1)); + buildScannerAndCheckColumnsCount(builder, 2); + + // Test with column names overriding indexes. + builder = asyncClient.newScannerBuilder(table); + builder.setProjectedColumnIndexes(Lists.newArrayList(0, 1)); + builder.setProjectedColumnNames(Lists.newArrayList(schema.getColumnByIndex(0).getName())); + buildScannerAndCheckColumnsCount(builder, 1); + + // Test with keys last with indexes. + builder = asyncClient.newScannerBuilder(table); + builder.setProjectedColumnIndexes(Lists.newArrayList(2, 1, 0)); + buildScannerAndCheckColumnsCount(builder, 3); + + // Test with keys last with column names. + builder = asyncClient.newScannerBuilder(table); + builder.setProjectedColumnNames(Lists.newArrayList(schema.getColumnByIndex(2).getName(), + schema.getColumnByIndex(0).getName())); + buildScannerAndCheckColumnsCount(builder, 2); + } + + @Test(timeout = 100000) + public void testReplicaSelections() throws Exception { + AsyncKuduScanner scanner = asyncClient.newScannerBuilder(table) + .replicaSelection(ReplicaSelection.LEADER_ONLY) + .build(); + + assertEquals(9, countRowsInScan(scanner)); + + scanner = asyncClient.newScannerBuilder(table) + .replicaSelection(ReplicaSelection.CLOSEST_REPLICA) + .build(); + + assertEquals(9, countRowsInScan(scanner)); + } + + @Test(timeout = 100000) + public void testScanTokenReplicaSelections() throws Exception { + ScanTokenPB.Builder pbBuilder = ScanTokenPB.newBuilder(); + pbBuilder.setTableName(table.getName()); + pbBuilder.setReplicaSelection(Common.ReplicaSelection.CLOSEST_REPLICA); + Client.ScanTokenPB scanTokenPB = pbBuilder.build(); + final byte[] serializedToken = KuduScanToken.serialize(scanTokenPB); + + // Deserialize the scan token into a scanner, and make sure it is using + // 'CLOSEST_REPLICA' selection policy. + KuduScanner scanner = KuduScanToken.deserializeIntoScanner(serializedToken, client); + assertEquals(ReplicaSelection.CLOSEST_REPLICA, scanner.getReplicaSelection()); + assertEquals(9, countRowsInScan(scanner)); + } + + @Test(timeout = 100000) + public void testReadAtSnapshotNoTimestamp() throws Exception { + // Perform scan in READ_AT_SNAPSHOT mode with no snapshot timestamp + // specified. Verify that the scanner timestamp is set from the tablet + // server response. + AsyncKuduScanner scanner = asyncClient.newScannerBuilder(table) + .readMode(AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT) + .build(); + assertEquals(AsyncKuduClient.NO_TIMESTAMP, scanner.getSnapshotTimestamp()); + KuduScanner syncScanner = new KuduScanner(scanner); + assertEquals(scanner.getReadMode(), syncScanner.getReadMode()); + + assertTrue(syncScanner.hasMoreRows()); + assertEquals(AsyncKuduClient.NO_TIMESTAMP, scanner.getSnapshotTimestamp()); + + int rowCount = syncScanner.nextRows().getNumRows(); + // At this point, the call to the first tablet server should have been + // done already, so check the snapshot timestamp. + final long tsRef = scanner.getSnapshotTimestamp(); + assertNotEquals(AsyncKuduClient.NO_TIMESTAMP, tsRef); + + assertTrue(syncScanner.hasMoreRows()); + while (syncScanner.hasMoreRows()) { + rowCount += syncScanner.nextRows().getNumRows(); + assertEquals(tsRef, scanner.getSnapshotTimestamp()); + } + assertEquals(9, rowCount); + } + + // Regression test for KUDU-2415. + // Scanning a never-written-to tablet from a fresh client with no propagated + // timestamp in "read-your-writes' mode should not fail. + @Test(timeout = 100000) + public void testReadYourWritesFreshClientFreshTable() throws Exception { + + // Perform scan in READ_YOUR_WRITES mode. Before the scan, verify that the + // propagated timestamp is unset, since this is a fresh client. + AsyncKuduScanner scanner = asyncClient.newScannerBuilder(table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .build(); + KuduScanner syncScanner = new KuduScanner(scanner); + assertEquals(scanner.getReadMode(), syncScanner.getReadMode()); + assertEquals(AsyncKuduClient.NO_TIMESTAMP, asyncClient.getLastPropagatedTimestamp()); + assertEquals(AsyncKuduClient.NO_TIMESTAMP, scanner.getSnapshotTimestamp()); + + // Since there isn't any write performed from the client, the count + // should range from [0, 9]. + int count = countRowsInScan(syncScanner); + assertTrue(count >= 0); + assertTrue(count <= 9); + + assertNotEquals(AsyncKuduClient.NO_TIMESTAMP, asyncClient.getLastPropagatedTimestamp()); + assertNotEquals(AsyncKuduClient.NO_TIMESTAMP, scanner.getSnapshotTimestamp()); + } + + // Test multi tablets scan in READ_YOUR_WRITES mode for both AUTO_FLUSH_SYNC + // (single operation) and MANUAL_FLUSH (batches) flush modes to ensure + // client-local read-your-writes. + @Test(timeout = 100000) + public void testReadYourWrites() throws Exception { + long preTs = beforeWriteTimestamp; + + // Update the propagated timestamp to ensure we see the rows written + // in the constructor. + client.updateLastPropagatedTimestamp(preTs); + + // Perform scan in READ_YOUR_WRITES mode. Before the scan, verify that the + // scanner timestamp is not yet set. It will get set only once the scan + // is opened. + AsyncKuduScanner scanner = asyncClient.newScannerBuilder(table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .build(); + KuduScanner syncScanner = new KuduScanner(scanner); + assertEquals(scanner.getReadMode(), syncScanner.getReadMode()); + assertEquals(AsyncKuduClient.NO_TIMESTAMP, scanner.getSnapshotTimestamp()); + + assertEquals(9, countRowsInScan(syncScanner)); + + // After the scan, verify that the chosen snapshot timestamp is + // returned from the server and it is larger than the previous + // propagated timestamp. + assertNotEquals(AsyncKuduClient.NO_TIMESTAMP, scanner.getSnapshotTimestamp()); + assertTrue(preTs < scanner.getSnapshotTimestamp()); + syncScanner.close(); + + // Perform write in MANUAL_FLUSH (batch) mode. + KuduSession session = client.newSession(); + session.setFlushMode(KuduSession.FlushMode.MANUAL_FLUSH); + String[] keys = new String[] {"11", "22", "33"}; + for (int i = 0; i < keys.length; i++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addString(schema.getColumnByIndex(0).getName(), keys[i]); + row.addString(schema.getColumnByIndex(1).getName(), keys[i]); + session.apply(insert); + } + session.flush(); + session.close(); + + scanner = asyncClient.newScannerBuilder(table) + .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES) + .build(); + syncScanner = new KuduScanner(scanner); + assertTrue(preTs < asyncClient.getLastPropagatedTimestamp()); + preTs = asyncClient.getLastPropagatedTimestamp(); + + assertEquals(12, countRowsInScan(syncScanner)); + + // After the scan, verify that the chosen snapshot timestamp is + // returned from the server and it is larger than the previous + // propagated timestamp. + assertTrue(preTs < scanner.getSnapshotTimestamp()); + syncScanner.close(); + } + + @Test(timeout = 100000) + public void testScanPropagatesLatestTimestamp() throws Exception { + AsyncKuduScanner scanner = asyncClient.newScannerBuilder(table).build(); + + // Initially, the client does not have the timestamp set. + assertEquals(AsyncKuduClient.NO_TIMESTAMP, asyncClient.getLastPropagatedTimestamp()); + assertEquals(KuduClient.NO_TIMESTAMP, client.getLastPropagatedTimestamp()); + KuduScanner syncScanner = new KuduScanner(scanner); + + // Check that both clients return the same propagated timestamp. + assertTrue(syncScanner.hasMoreRows()); + assertEquals(AsyncKuduClient.NO_TIMESTAMP, asyncClient.getLastPropagatedTimestamp()); + assertEquals(KuduClient.NO_TIMESTAMP, client.getLastPropagatedTimestamp()); + + int rowCount = syncScanner.nextRows().getNumRows(); + // At this point, the call to the first tablet server should have been + // done already, so the client should have received the propagated timestamp + // in the scanner response. + long asyncTsRef = asyncClient.getLastPropagatedTimestamp(); + long syncTsRef = client.getLastPropagatedTimestamp(); + assertEquals(asyncTsRef, syncTsRef); + assertNotEquals(AsyncKuduClient.NO_TIMESTAMP, asyncTsRef); + assertNotEquals(KuduClient.NO_TIMESTAMP, syncTsRef); + + assertTrue(syncScanner.hasMoreRows()); + while (syncScanner.hasMoreRows()) { + rowCount += syncScanner.nextRows().getNumRows(); + final long asyncTs = asyncClient.getLastPropagatedTimestamp(); + final long syncTs = client.getLastPropagatedTimestamp(); + // Next scan responses from tablet servers should move the propagated + // timestamp further. + assertEquals(syncTs, asyncTs); + assertTrue(asyncTs > asyncTsRef); + asyncTsRef = asyncTs; + } + assertNotEquals(0, rowCount); + } + + @Test(timeout = 100000) + public void testScanTokenPropagatesTimestamp() throws Exception { + // Initially, the client does not have the timestamp set. + assertEquals(AsyncKuduClient.NO_TIMESTAMP, asyncClient.getLastPropagatedTimestamp()); + assertEquals(KuduClient.NO_TIMESTAMP, client.getLastPropagatedTimestamp()); + AsyncKuduScanner scanner = asyncClient.newScannerBuilder(table).build(); + KuduScanner syncScanner = new KuduScanner(scanner); + + // Let the client receive the propagated timestamp in the scanner response. + syncScanner.nextRows().getNumRows(); + final long tsPrev = asyncClient.getLastPropagatedTimestamp(); + final long tsPropagated = tsPrev + 1000000; + + ScanTokenPB.Builder pbBuilder = ScanTokenPB.newBuilder(); + pbBuilder.setTableName(table.getName()); + pbBuilder.setPropagatedTimestamp(tsPropagated); + Client.ScanTokenPB scanTokenPB = pbBuilder.build(); + final byte[] serializedToken = KuduScanToken.serialize(scanTokenPB); + + // Deserialize scan tokens and make sure the client's last propagated + // timestamp is updated accordingly. + assertEquals(tsPrev, asyncClient.getLastPropagatedTimestamp()); + KuduScanToken.deserializeIntoScanner(serializedToken, client); + assertEquals(tsPropagated, asyncClient.getLastPropagatedTimestamp()); + } + + @Test(timeout = 100000) + public void testScanTokenReadMode() throws Exception { + ScanTokenPB.Builder pbBuilder = ScanTokenPB.newBuilder(); + pbBuilder.setTableName(table.getName()); + pbBuilder.setReadMode(Common.ReadMode.READ_YOUR_WRITES); + Client.ScanTokenPB scanTokenPB = pbBuilder.build(); + final byte[] serializedToken = KuduScanToken.serialize(scanTokenPB); + + // Deserialize scan tokens and make sure the read mode is updated accordingly. + KuduScanner scanner = KuduScanToken.deserializeIntoScanner(serializedToken, client); + assertEquals(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES, scanner.getReadMode()); + } + + private AsyncKuduScanner getScanner(String lowerBoundKeyOne, + String lowerBoundKeyTwo, + String exclusiveUpperBoundKeyOne, + String exclusiveUpperBoundKeyTwo) { + return getScanner(lowerBoundKeyOne, lowerBoundKeyTwo, + exclusiveUpperBoundKeyOne, exclusiveUpperBoundKeyTwo, null); + } + + @SuppressWarnings("deprecation") + private AsyncKuduScanner getScanner(String lowerBoundKeyOne, + String lowerBoundKeyTwo, + String exclusiveUpperBoundKeyOne, + String exclusiveUpperBoundKeyTwo, + ColumnRangePredicate predicate) { + AsyncKuduScanner.AsyncKuduScannerBuilder builder = asyncClient.newScannerBuilder(table); + + if (lowerBoundKeyOne != null) { + PartialRow lowerBoundRow = schema.newPartialRow(); + lowerBoundRow.addString(0, lowerBoundKeyOne); + lowerBoundRow.addString(1, lowerBoundKeyTwo); + builder.lowerBound(lowerBoundRow); + } + + if (exclusiveUpperBoundKeyOne != null) { + PartialRow upperBoundRow = schema.newPartialRow(); + upperBoundRow.addString(0, exclusiveUpperBoundKeyOne); + upperBoundRow.addString(1, exclusiveUpperBoundKeyTwo); + builder.exclusiveUpperBound(upperBoundRow); + } + + if (predicate != null) { + builder.addColumnRangePredicate(predicate); + } + + return builder.build(); + } + + private void buildScannerAndCheckColumnsCount(AsyncKuduScanner.AsyncKuduScannerBuilder builder, + int count) throws Exception { + AsyncKuduScanner scanner = builder.build(); + scanner.nextRows().join(DEFAULT_SLEEP); + RowResultIterator rri = scanner.nextRows().join(DEFAULT_SLEEP); + assertEquals(count, rri.next().getSchema().getColumns().size()); + } + + private static Schema getSchema() { + ArrayList columns = new ArrayList<>(3); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key1", STRING) + .key(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key2", STRING) + .key(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("val", STRING) + .nullable(true) // Important because we need to make sure it gets passed in projections + .build()); + return new Schema(columns); + } +} diff --git a/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestSecurity.java b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestSecurity.java new file mode 100644 index 0000000000..cbe84d4264 --- /dev/null +++ b/java-scala-spark4/kudu-client/src/test/java/org/apache/kudu/client/TestSecurity.java @@ -0,0 +1,588 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.kudu.client; + +import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert; +import static org.apache.kudu.test.ClientTestUtil.getBasicCreateTableOptions; +import static org.apache.kudu.test.ClientTestUtil.getBasicSchema; +import static org.apache.kudu.test.junit.AssertHelpers.assertEventuallyTrue; +import static org.junit.Assert.assertNotNull; + +import java.io.Closeable; +import java.io.IOException; +import java.security.PrivilegedActionException; +import java.security.PrivilegedExceptionAction; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import javax.security.auth.Subject; + +import com.google.common.base.Stopwatch; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.stumbleupon.async.Deferred; +import org.hamcrest.CoreMatchers; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.client.Client.AuthenticationCredentialsPB; +import org.apache.kudu.master.Master.ConnectToMasterResponsePB; +import org.apache.kudu.test.CapturingLogAppender; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.cluster.FakeDNS; +import org.apache.kudu.test.cluster.MiniKuduCluster; +import org.apache.kudu.test.cluster.MiniKuduCluster.MiniKuduClusterBuilder; +import org.apache.kudu.test.junit.AssertHelpers; +import org.apache.kudu.test.junit.AssertHelpers.BooleanExpression; +import org.apache.kudu.test.junit.RetryRule; +import org.apache.kudu.util.SecurityUtil; + +public class TestSecurity { + private static final String TABLE_NAME = "TestSecurity-table"; + private static final int TICKET_LIFETIME_SECS = 10; + private static final int RENEWABLE_LIFETIME_SECS = 20; + public static final String CUSTOM_PRINCIPAL = "oryx"; + + @Rule + public KuduTestHarness harness = new KuduTestHarness(); + + private CapturingLogAppender cla; + private MiniKuduCluster miniCluster; + private KuduClient client; + + private enum Option { + LONG_LEADER_ELECTION, + SHORT_TOKENS_AND_TICKETS, + START_TSERVERS, + CUSTOM_PRINCIPAL, + } + + private static class KeyValueMessage { + final String key; + final String val; + final String msg; + + KeyValueMessage(String k, String v, String m) { + key = k; + val = v; + msg = m; + } + } + + private void startCluster(Set

+ * {@code
+ *  
+ *    hive.metastore.transactional.event.listeners
+ *    org.apache.kudu.hive.metastore.KuduMetastorePlugin
+ *  
+ * }
+ * 
+ * + * The plugin enforces that managed Kudu table entries in the HMS always contain + * two properties: a Kudu table ID and the Kudu master addresses. It also + * enforces that non-Kudu tables do not have these properties (except cases + * when upgrading tables with legacy Kudu storage handler to be Kudu tables + * or downgrading from the other way around). The plugin considers entries + * to be Kudu tables if they contain the Kudu storage handler. + * + * Additionally, the plugin checks that when particular events have an + * environment containing a Kudu table ID, that event only applies + * to the specified Kudu table. This provides some amount of concurrency + * safety, so that the Kudu Master can ensure it is operating on the correct + * table entry. + * + * Note that such validation does not apply to tables with legacy Kudu + * storage handler and will be skipped if system env KUDU_SKIP_HMS_PLUGIN_VALIDATION + * is set to non-zero. + */ +public class KuduMetastorePlugin extends MetaStoreEventListener { + + @VisibleForTesting + static final String KUDU_STORAGE_HANDLER = "org.apache.hadoop.hive.kudu.KuduStorageHandler"; + @VisibleForTesting + static final String LEGACY_KUDU_STORAGE_HANDLER = "com.cloudera.kudu.hive.KuduStorageHandler"; + @VisibleForTesting + static final String KUDU_CLUSTER_ID_KEY = "kudu.cluster_id"; + @VisibleForTesting + static final String KUDU_TABLE_ID_KEY = "kudu.table_id"; + @VisibleForTesting + static final String KUDU_TABLE_NAME_KEY = "kudu.table_name"; + @VisibleForTesting + static final String KUDU_MASTER_ADDRS_KEY = "kudu.master_addresses"; + @VisibleForTesting + static final String KUDU_MASTER_EVENT_KEY = "kudu.master_event"; + @VisibleForTesting + static final String KUDU_CHECK_ID_KEY = "kudu.check_id"; + // The key should keep in sync with the one used in + // org.apache.hadoop.hive.metastore.MetaStoreUtils.isExternalTable(). + @VisibleForTesting + static final String EXTERNAL_TABLE_KEY = "EXTERNAL"; + + static final String EXTERNAL_PURGE_KEY = "external.table.purge"; + + static final String COMMENT_KEY = "comment"; + + // System env to track if the HMS plugin validation should be skipped. + static final String SKIP_VALIDATION_ENV = "KUDU_SKIP_HMS_PLUGIN_VALIDATION"; + + // System env to force sync enabled/disabled without a call to the master. + // This is useful for testing and could be useful as an escape hatch if there + // are too many requests to the master. + static final String SYNC_ENABLED_ENV = "KUDU_HMS_SYNC_ENABLED"; + + // System env to set a custom sasl protocol name for the Kudu client. + // TODO(ghenke): Use a Hive config parameter from the KuduStorageHandler instead. + static final String SASL_PROTOCOL_NAME_ENV = "KUDU_SASL_PROTOCOL_NAME"; + + // Maps lists of master addresses to KuduClients to cache clients. + private static final Map KUDU_CLIENTS = + new ConcurrentHashMap(); + + public KuduMetastorePlugin(Configuration config) { + super(config); + } + + @Override + public void onCreateTable(CreateTableEvent tableEvent) throws MetaException { + super.onCreateTable(tableEvent); + + if (skipsValidation()) { + return; + } + + Table table = tableEvent.getTable(); + + // Only validate synchronized tables. + if (!isSynchronizedTable(table)) { + return; + } + + // Allow non-Kudu tables to be created. + if (!isKuduTable(table)) { + // But ensure that the new table does not contain Kudu-specific properties. + checkNoKuduProperties(table); + return; + } + + // In the case of table creation all Kudu tables must have + // the master addresses property. We explicitly check for it here + // so that `kuduSyncEnabled` below doesn't return false for Kudu + // tables that are missing the master addresses property. + checkMasterAddrsProperty(table); + + // Only validate tables for clusters with HMS sync enabled. + if (!kuduSyncEnabled(tableEvent, table)) { + return; + } + + checkKuduProperties(table); + + if (!isKuduMasterAction(tableEvent)) { + throw new MetaException("Kudu tables may not be created through Hive"); + } + } + + @Override + public void onDropTable(DropTableEvent tableEvent) throws MetaException { + super.onDropTable(tableEvent); + + if (skipsValidation()) { + return; + } + + Table table = tableEvent.getTable(); + + // Only validate synchronized tables. + if (!isSynchronizedTable(table)) { + return; + } + + EnvironmentContext environmentContext = tableEvent.getEnvironmentContext(); + String targetTableId = environmentContext == null ? null : + environmentContext.getProperties().get(KUDU_TABLE_ID_KEY); + + // Allow non-Kudu tables to be dropped. + if (!isKuduTable(table)) { + // However, make sure it doesn't have a table id from the context. + // The table id is only meant for Kudu tables and set by the Kudu master. + if (targetTableId != null) { + throw new MetaException("Kudu table ID does not match the non-Kudu HMS entry"); + } + return; + } + + // Only validate tables for clusters with HMS sync enabled. + if (!kuduSyncEnabled(tableEvent, table)) { + return; + } + + // If this request doesn't specify a Kudu table ID then allow it to proceed. + // Drop table requests that don't come from the Kudu master may not set the table ID, + // e.g. when dropping tables via Hive, or dropping orphaned HMS entries. + // Such tables are dropped in Kudu by name via the notification listener. + if (targetTableId == null) { + return; + } + + // The kudu.master_event property isn't checked, because the kudu.table_id + // property already implies this event is coming from a Kudu Master. + + // Check that the table's ID matches the request's table ID. + if (!targetTableId.equals(table.getParameters().get(KUDU_TABLE_ID_KEY))) { + throw new MetaException("Kudu table ID does not match the HMS entry"); + } + } + + @Override + public void onAlterTable(AlterTableEvent tableEvent) throws MetaException { + super.onAlterTable(tableEvent); + + if (skipsValidation()) { + return; + } + + Table oldTable = tableEvent.getOldTable(); + Table newTable = tableEvent.getNewTable(); + + // Allow non-Kudu tables to be altered. + if (!isKuduTable(oldTable) && !isLegacyKuduTable(oldTable)) { + // Allow non-Kudu tables to be altered without introducing Kudu-specific + // properties. + checkNoKuduProperties(newTable); + return; + } + + // Check if the alter changes any of the Kudu metadata. + // If not, we can skip checking for synchronization given Kudu doesn't care about the changes. + // We primarily expect this case to occur when a table is migrated from a managed table + // to an external table with the purge property. This change is effectively a no-op to Kudu. + if (kuduMetadataUnchanged(oldTable, newTable)) { + return; + } + + // Only validate tables for clusters with HMS sync enabled. + if (!kuduSyncEnabled(tableEvent, oldTable)) { + return; + } + + // Prevent altering the table type (managed/external) of Kudu tables (or via + // altering table properties 'EXTERNAL' or `external.table.purge`) in a way + // that changes if a table is synchronized. This can cause orphaned tables. + // Note: This doesn't prevent altering the table type for legacy tables + // because they should continue to work as they always have primarily for + // migration purposes. + // The Kudu master is allowed to make these changes if necessary as it is a trusted user. + if (isKuduTable(oldTable) && + !isKuduMasterAction(tableEvent) && + isSynchronizedTable(oldTable) != isSynchronizedTable(newTable) ) { + throw new MetaException("Kudu table type may not be altered"); + } + + // Only validate synchronized tables. + if (!isSynchronizedTable(oldTable)) { + return; + } + + if (isLegacyKuduTable(oldTable)) { + if (isKuduTable(newTable)) { + // Allow legacy tables to be upgraded to Kudu tables. Validate the upgraded + // table entry contains the required Kudu table properties, and that any + // potential schema alterations are coming from the Kudu master. + checkKuduProperties(newTable); + checkOnlyKuduMasterCanAlterSchema(tableEvent, oldTable, newTable); + return; + } + // Allow legacy tables to be altered without introducing Kudu-specific + // properties. + checkNoKuduProperties(newTable); + } else if (isKuduTable(oldTable)) { + if (isLegacyKuduTable(newTable)) { + // Allow Kudu tables to be downgraded to legacy tables. Validate the downgraded + // table entry does not contain Kudu-specific properties, and that any potential + // schema alterations are coming from the Kudu master. + checkNoKuduProperties(newTable); + checkOnlyKuduMasterCanAlterSchema(tableEvent, oldTable, newTable); + return; + } + // Validate the new table entry contains the required Kudu table properties, and + // that any potential schema alterations are coming from the Kudu master. + checkKuduProperties(newTable); + checkOnlyKuduMasterCanAlterSchema(tableEvent, oldTable, newTable); + // Check that the Kudu table ID isn't changing. + + if (checkTableID(tableEvent)) { + String oldTableId = oldTable.getParameters().get(KUDU_TABLE_ID_KEY); + String newTableId = newTable.getParameters().get(KUDU_TABLE_ID_KEY); + if (!newTableId.equals(oldTableId)) { + throw new MetaException("Kudu table ID does not match the existing HMS entry"); + } + } + } + } + + /** + * Checks whether the table is a Kudu table. + * @param table the table to check + * @return {@code true} if the table is a Kudu table, otherwise {@code false} + */ + private static boolean isKuduTable(Table table) { + String storageHandler = table.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE); + return KUDU_STORAGE_HANDLER.equals(storageHandler); + } + + /** + * Checks whether the table is a Kudu table with legacy Kudu + * storage handler. + * + * @param table the table to check + * @return {@code true} if the table is a legacy Kudu table, + * otherwise {@code false} + */ + private static boolean isLegacyKuduTable(Table table) { + return LEGACY_KUDU_STORAGE_HANDLER.equals(table.getParameters() + .get(hive_metastoreConstants.META_TABLE_STORAGE)); + } + + /** + * Checks whether the table is an external table. + * + * @param table the table to check + * @return {@code true} if the table is an external table, + * otherwise {@code false} + */ + private static boolean isExternalTable(Table table) { + String isExternal = table.getParameters().get(EXTERNAL_TABLE_KEY); + if (isExternal == null) { + return false; + } + return Boolean.parseBoolean(isExternal); + } + + /** + * Checks whether the table should be purged when deleted, i.e. the + * underlying Kudu table should be deleted when the HMS table entry is + * deleted. + * + * @param table the table to check + * @return {@code true} if the table is a managed table or has external.table.purge = true, + * otherwise {@code false} + */ + private static boolean isPurgeTable(Table table) { + boolean externalPurge = + Boolean.parseBoolean(table.getParameters().getOrDefault(EXTERNAL_PURGE_KEY, "false")); + return TableType.MANAGED_TABLE.name().equals(table.getTableType()) || externalPurge; + } + + /** + * Checks whether the table is considered a synchronized Kudu table. + * + * @param table the table to check + * @return {@code true} if the table is a managed table or an external table with + * `external.table.purge = true`, otherwise {@code false} + */ + private static boolean isSynchronizedTable(Table table) { + return TableType.MANAGED_TABLE.name().equals(table.getTableType()) || + (isExternalTable(table) && isPurgeTable(table)); + } + + /** + * Checks that the Kudu table entry contains the required Kudu table properties. + * @param table the table to check + */ + private static void checkKuduProperties(Table table) throws MetaException { + if (!isKuduTable(table)) { + throw new MetaException(String.format( + "Kudu table entry must contain a Kudu storage handler property (%s=%s)", + hive_metastoreConstants.META_TABLE_STORAGE, + KUDU_STORAGE_HANDLER)); + } + String tableId = table.getParameters().get(KUDU_TABLE_ID_KEY); + if (tableId == null || tableId.isEmpty()) { + throw new MetaException(String.format( + "Kudu table entry must contain a table ID property (%s)", KUDU_TABLE_ID_KEY)); + } + checkMasterAddrsProperty(table); + } + + /** + * Checks that the Kudu table entry contains the `kudu.master_addresses` property. + * @param table the table to check + */ + private static void checkMasterAddrsProperty(Table table) throws MetaException { + String masterAddresses = table.getParameters().get(KUDU_MASTER_ADDRS_KEY); + if (masterAddresses == null || masterAddresses.isEmpty()) { + throw new MetaException(String.format( + "Kudu table entry must contain a Master addresses property (%s)", KUDU_MASTER_ADDRS_KEY)); + } + } + + /** + * Checks that the non-Kudu table entry does not contain Kudu-specific table properties. + * @param table the table to check + */ + private static void checkNoKuduProperties(Table table) throws MetaException { + if (isKuduTable(table)) { + throw new MetaException(String.format( + "non-Kudu table entry must not contain the Kudu storage handler (%s=%s)", + hive_metastoreConstants.META_TABLE_STORAGE, + KUDU_STORAGE_HANDLER)); + } + if (table.getParameters().containsKey(KUDU_TABLE_ID_KEY)) { + throw new MetaException(String.format( + "non-Kudu table entry must not contain a table ID property (%s)", + KUDU_TABLE_ID_KEY)); + } + if (table.getParameters().containsKey(KUDU_CLUSTER_ID_KEY)) { + throw new MetaException(String.format( + "non-Kudu table entry must not contain a cluster ID property (%s)", + KUDU_CLUSTER_ID_KEY)); + } + } + + /** + * Checks that the metadata relevant to Kudu is unchanged between the before and after table. + * See HmsCatalog::PopulateTable in hms_catalog.cc for a reference to the relevant metadata. + * + * @param before the table to be altered + * @param after the new altered table + * @return true if no Kudu relevant metadata has changed + */ + @VisibleForTesting + static boolean kuduMetadataUnchanged(Table before, Table after) { + // If any of the Kudu table properties have changed, return false. + Map beforeParams = before.getParameters(); + Map afterParams = after.getParameters(); + if (!Objects.equals(beforeParams.get(hive_metastoreConstants.META_TABLE_STORAGE), + afterParams.get(hive_metastoreConstants.META_TABLE_STORAGE)) || + !Objects.equals(beforeParams.get(KUDU_MASTER_ADDRS_KEY), + afterParams.get(KUDU_MASTER_ADDRS_KEY)) || + !Objects.equals(beforeParams.get(KUDU_TABLE_ID_KEY), + afterParams.get(KUDU_TABLE_ID_KEY)) || + !Objects.equals(beforeParams.get(KUDU_TABLE_NAME_KEY), + afterParams.get(KUDU_TABLE_NAME_KEY)) || + !Objects.equals(beforeParams.get(KUDU_CLUSTER_ID_KEY), + afterParams.get(KUDU_CLUSTER_ID_KEY))) { + return false; + } + + // If the table synchronization has changed, return false. + // Kudu doesn't care if the table is managed vs external with the purge property set + // to true, it just cares that he table is synchronized. + if (isSynchronizedTable(before) != isSynchronizedTable(after)) { + return false; + } + + // If the table database, name, owner, or comment have changed, return false. + if (!Objects.equals(before.getDbName(), after.getDbName()) || + !Objects.equals(before.getTableName(), after.getTableName()) || + !Objects.equals(before.getOwner(), after.getOwner()) || + !Objects.equals(beforeParams.get(COMMENT_KEY), + afterParams.get(COMMENT_KEY))) { + return false; + } + + // If the column count has changed, return false. + List beforeCols = before.getSd().getCols(); + List afterCols = after.getSd().getCols(); + if (beforeCols.size() != afterCols.size()) { + return false; + } + + // If any of the columns have changed (name, type, or comment), return false. + // We don't have the Kudu internal column ID, so we assume the column index + // in both tables aligns if there are no changes. + for (int i = 0; i < beforeCols.size(); i++) { + FieldSchema beforeCol = beforeCols.get(i); + FieldSchema afterCol = afterCols.get(i); + if (!Objects.equals(beforeCol.getName(), afterCol.getName()) || + !Objects.equals(beforeCol.getType(), afterCol.getType()) || + !Objects.equals(beforeCol.getComment(), afterCol.getComment())) { + return false; + } + } + + // Kudu doesn't have metadata related to all other changes. + return true; + } + + /** + * Checks that the table schema can only be altered by an action from the Kudu Master. + * @param tableEvent + * @param oldTable the table to be altered + * @param newTable the new altered table + */ + private static void checkOnlyKuduMasterCanAlterSchema(AlterTableEvent tableEvent, + Table oldTable, Table newTable) throws MetaException { + if (!isKuduMasterAction(tableEvent) && + !oldTable.getSd().getCols().equals(newTable.getSd().getCols())) { + throw new MetaException("Kudu table columns may not be altered through Hive"); + } + } + + /** + * Returns true if the event is from the Kudu Master. + */ + private static boolean isKuduMasterAction(ListenerEvent event) { + EnvironmentContext environmentContext = event.getEnvironmentContext(); + if (environmentContext == null) { + return false; + } + + Map properties = environmentContext.getProperties(); + if (properties == null) { + return false; + } + + if (!properties.containsKey(KUDU_MASTER_EVENT_KEY)) { + return false; + } + + return Boolean.parseBoolean(properties.get(KUDU_MASTER_EVENT_KEY)); + } + + /** + * Returns true if the table ID should be verified on an event. + * Defaults to true. + */ + private static boolean checkTableID(ListenerEvent event) { + EnvironmentContext environmentContext = event.getEnvironmentContext(); + if (environmentContext == null) { + return true; + } + + Map properties = environmentContext.getProperties(); + if (properties == null) { + return true; + } + + if (!properties.containsKey(KUDU_CHECK_ID_KEY)) { + return true; + } + + return Boolean.parseBoolean(properties.get(KUDU_CHECK_ID_KEY)); + } + + /** + * Returns true if the system env is set to skip validation. + */ + private static boolean skipsValidation() { + String skipValidation = System.getenv(SKIP_VALIDATION_ENV); + if (skipValidation == null || skipValidation.isEmpty() || + Integer.parseInt(skipValidation) == 0) { + return false; + } + return true; + } + + /** + * Returns true if HMS synchronization is configured on the Kudu cluster + * backing the HMS table. + */ + private static boolean kuduSyncEnabled(ListenerEvent event, Table table) throws MetaException { + // If SYNC_ENABLED_ENV is set, use it instead of contacting the Kudu master. + String envEnabled = System.getenv(SYNC_ENABLED_ENV); + if (envEnabled != null && !envEnabled.isEmpty()) { + return Integer.parseInt(envEnabled) == 1; + } + + // If the request is from the Kudu Master, we know HMS sync is enabled + // and can avoid another request. + if (isKuduMasterAction(event)) { + return true; + } + + String masterAddresses = table.getParameters().get(KUDU_MASTER_ADDRS_KEY); + if (masterAddresses == null || masterAddresses.isEmpty()) { + // A table without master addresses is not synchronized, + // it may not even be a Kudu table. + return false; + } + + KuduClient kuduClient = getKuduClient(masterAddresses); + HiveMetastoreConfig hmsConfig; + try { + hmsConfig = kuduClient.getHiveMetastoreConfig(); + } catch (KuduException e) { + throw new MetaException( + String.format("Error determining if Kudu's integration with " + + "the Hive Metastore is enabled: %s", e.getMessage())); + } + + // If the HiveMetastoreConfig is not null, then the HMS synchronization + // is enabled in the Kudu cluster. + return hmsConfig != null; + } + + private static KuduClient getKuduClient(String kuduMasters) { + KuduClient client = KUDU_CLIENTS.get(kuduMasters); + if (client == null) { + try { + client = UserGroupInformation.getLoginUser().doAs( + (PrivilegedExceptionAction) () -> + new KuduClient.KuduClientBuilder(kuduMasters) + .saslProtocolName(getSaslProtocolName()) + .build() + ); + } catch (IOException | InterruptedException e) { + throw new RuntimeException("Failed to create the Kudu client"); + } + KUDU_CLIENTS.put(kuduMasters, client); + } + return client; + } + + private static String getSaslProtocolName() { + String saslProtocolName = System.getenv(SASL_PROTOCOL_NAME_ENV); + if (saslProtocolName == null || saslProtocolName.isEmpty()) { + saslProtocolName = "kudu"; + } + return saslProtocolName; + } +} diff --git a/java-scala-spark4/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java b/java-scala-spark4/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java new file mode 100644 index 0000000000..589637adb8 --- /dev/null +++ b/java-scala-spark4/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java @@ -0,0 +1,730 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.hive.metastore; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.UUID; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreEventListener; +import org.apache.hadoop.hive.metastore.PartitionExpressionProxy; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.thrift.TException; +import org.junit.After; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.test.cluster.MiniKuduCluster; + +public class TestKuduMetastorePlugin { + private static final Logger LOG = LoggerFactory.getLogger(TestKuduMetastorePlugin.class); + + private HiveConf clientConf; + private HiveMetaStoreClient client; + private MiniKuduCluster miniCluster; + + private EnvironmentContext masterContext() { + return new EnvironmentContext( + ImmutableMap.of(KuduMetastorePlugin.KUDU_MASTER_EVENT_KEY, "true")); + } + + public void startCluster(boolean syncEnabled) throws Exception { + Configuration hmsConf = MetastoreConf.newMetastoreConf(); + + // Avoids a dependency on the default partition expression class, which is + // contained in the hive-exec jar. + hmsConf.setClass(MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS.getVarname(), + DefaultPartitionExpressionProxy.class, + PartitionExpressionProxy.class); + + // Add the KuduMetastorePlugin. + hmsConf.setClass(MetastoreConf.ConfVars.TRANSACTIONAL_EVENT_LISTENERS.getVarname(), + KuduMetastorePlugin.class, + MetaStoreEventListener.class); + + // Auto create necessary schema on a startup if one doesn't exist. + MetastoreConf.setBoolVar(hmsConf, MetastoreConf.ConfVars.AUTO_CREATE_ALL, true); + MetastoreConf.setBoolVar(hmsConf, MetastoreConf.ConfVars.SCHEMA_VERIFICATION, false); + + // Configure a temporary test state directory. + Path hiveTestDir = Files.createTempDirectory("hive"); + hiveTestDir.toFile().deleteOnExit(); // Ensure we cleanup state. + LOG.info("Using temporary test state directory:" + hiveTestDir); + + // Set the warehouse directory. + Path warehouseDir = hiveTestDir.resolve("warehouse"); + MetastoreConf.setVar(hmsConf, MetastoreConf.ConfVars.WAREHOUSE, warehouseDir.toString()); + // For some reason the maven tests fallback to the default warehouse directory + // and fail without this system property. However, the Gradle tests don't need it. + System.setProperty(MetastoreConf.ConfVars.WAREHOUSE.getVarname(), warehouseDir.toString()); + + Path warehouseExternalDir = hiveTestDir.resolve("external-warehouse"); + // NOTE: We use the string value for backwards compatibility. + MetastoreConf.setVar(hmsConf, MetastoreConf.ConfVars.WAREHOUSE_EXTERNAL, + warehouseExternalDir.toString()); + System.setProperty(MetastoreConf.ConfVars.WAREHOUSE_EXTERNAL.getVarname(), + warehouseExternalDir.toString()); + + // Set the metastore connection url. + Path metadb = hiveTestDir.resolve("metadb"); + MetastoreConf.setVar(hmsConf, MetastoreConf.ConfVars.CONNECT_URL_KEY, + "jdbc:derby:memory:" + metadb.toString() + ";create=true"); + // Set the derby log file. + Path derbyLogFile = hiveTestDir.resolve("derby.log"); + assertTrue(derbyLogFile.toFile().createNewFile()); + System.setProperty("derby.stream.error.file", derbyLogFile.toString()); + + int msPort = MetaStoreUtils.startMetaStore(hmsConf); + + clientConf = new HiveConf(); + clientConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); + + client = new HiveMetaStoreClient(clientConf); + + MiniKuduCluster.MiniKuduClusterBuilder mcb = new MiniKuduCluster.MiniKuduClusterBuilder(); + if (syncEnabled) { + mcb.addMasterServerFlag("--hive_metastore_uris=thrift://localhost:" + msPort); + } + miniCluster = mcb.numMasterServers(3) + .numTabletServers(0) + .build(); + } + + @After + public void tearDown() { + try { + if (client != null) { + client.close(); + } + } finally { + if (miniCluster != null) { + miniCluster.shutdown(); + } + } + } + + /** + * @return a Kudu table descriptor given the storage handler type. + */ + private Table newKuduTable(String name, String storageHandler) { + Table table = new Table(); + table.setDbName("default"); + table.setTableName(name); + table.setTableType(TableType.EXTERNAL_TABLE.toString()); + table.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE"); + table.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "TRUE"); + table.putToParameters(hive_metastoreConstants.META_TABLE_STORAGE, + storageHandler); + if (!storageHandler.equals(KuduMetastorePlugin.LEGACY_KUDU_STORAGE_HANDLER)) { + table.putToParameters(KuduMetastorePlugin.KUDU_TABLE_ID_KEY, + UUID.randomUUID().toString()); + table.putToParameters(KuduMetastorePlugin.KUDU_MASTER_ADDRS_KEY, + miniCluster.getMasterAddressesAsString()); + } + + // The HMS will NPE if the storage descriptor and partition keys aren't set... + StorageDescriptor sd = new StorageDescriptor(); + sd.addToCols(new FieldSchema("a", "bigint", "")); + sd.setSerdeInfo(new SerDeInfo()); + // Unset the location to ensure the default location defined by hive will be used. + sd.unsetLocation(); + table.setSd(sd); + table.setPartitionKeys(Lists.newArrayList()); + + return table; + } + + /** + * @return a legacy Kudu table descriptor. + */ + private Table newLegacyTable(String name) { + return newKuduTable(name, KuduMetastorePlugin.LEGACY_KUDU_STORAGE_HANDLER); + } + + /** + * @return a valid Kudu table descriptor. + */ + private Table newTable(String name) { + return newKuduTable(name, KuduMetastorePlugin.KUDU_STORAGE_HANDLER); + } + + @Test + public void testCreateTableHandler() throws Exception { + startCluster(/* syncEnabled */ true); + // A non-Kudu table with a Kudu table ID should be rejected. + try { + Table table = newTable("table"); + table.getParameters().remove(hive_metastoreConstants.META_TABLE_STORAGE); + table.getParameters().remove(KuduMetastorePlugin.KUDU_MASTER_ADDRS_KEY); + client.createTable(table); + fail(); + } catch (TException e) { + assertTrue( + e.getMessage(), + e.getMessage().contains( + "non-Kudu table entry must not contain a table ID property")); + } + + // A Kudu table without a Kudu table ID. + try { + Table table = newTable("table"); + table.getParameters().remove(KuduMetastorePlugin.KUDU_TABLE_ID_KEY); + client.createTable(table, masterContext()); + fail(); + } catch (TException e) { + assertTrue(e.getMessage(), + e.getMessage().contains("Kudu table entry must contain a table ID property")); + } + + // A Kudu table without master context. + try { + Table table = newTable("table"); + client.createTable(table); + fail(); + } catch (TException e) { + assertTrue(e.getMessage(), + e.getMessage().contains("Kudu tables may not be created through Hive")); + } + + // A Kudu table without a master address. + try { + Table table = newTable("table"); + table.getParameters().remove(KuduMetastorePlugin.KUDU_MASTER_ADDRS_KEY); + client.createTable(table, masterContext()); + fail(); + } catch (TException e) { + assertTrue( + e.getMessage(), + e.getMessage().contains( + "Kudu table entry must contain a Master addresses property")); + } + + // Check that creating a valid table is accepted. + { + Table table = newTable("table"); + client.createTable(table, masterContext()); + client.dropTable(table.getDbName(), table.getTableName()); + } + + // Check that creating an unsynchronized table is accepted. + { + Table table = newTable("table"); + table.setTableType(TableType.EXTERNAL_TABLE.toString()); + table.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE"); + table.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE"); + client.createTable(table); + client.dropTable(table.getDbName(), table.getTableName()); + } + } + + @Test + public void testAlterTableHandler() throws Exception { + startCluster(/* syncEnabled */ true); + // Test altering a Kudu (or a legacy) table. + Table initTable = newTable("table"); + client.createTable(initTable, masterContext()); + // Get the table from the HMS in case any translation occurred. + Table table = client.getTable(initTable.getDbName(), initTable.getTableName()); + Table legacyTable = newLegacyTable("legacy_table"); + client.createTable(legacyTable, masterContext()); + // Get the table from the HMS in case any translation occurred. + legacyTable = client.getTable(legacyTable.getDbName(), legacyTable.getTableName()); + try { + // Check that altering the table succeeds. + client.alter_table(table.getDbName(), table.getTableName(), table); + + // Try to alter the Kudu table with a different table ID. + Table newTable = table.deepCopy(); + newTable.putToParameters(KuduMetastorePlugin.KUDU_TABLE_ID_KEY, + UUID.randomUUID().toString()); + try { + client.alter_table(table.getDbName(), table.getTableName(), newTable); + fail(); + } catch (TException e) { + assertTrue(e.getMessage(), + e.getMessage().contains("Kudu table ID does not match the existing HMS entry")); + } + + // Check that altering the Kudu table with a different table ID while + // setting kudu.check_id to false succeeds. + EnvironmentContext noCheckIdCtx = new EnvironmentContext( + ImmutableMap.of(KuduMetastorePlugin.KUDU_CHECK_ID_KEY, "false")); + client.alter_table_with_environmentContext(table.getDbName(), table.getTableName(), + newTable, noCheckIdCtx); + // Alter back for more testing below. + client.alter_table_with_environmentContext(table.getDbName(), table.getTableName(), table, + noCheckIdCtx); + + // Try to alter the Kudu table with no storage handler. + try { + Table alteredTable = table.deepCopy(); + alteredTable.getParameters().remove(hive_metastoreConstants.META_TABLE_STORAGE); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + fail(); + } catch (TException e) { + assertTrue( + e.getMessage(), + e.getMessage().contains( + "Kudu table entry must contain a Kudu storage handler property")); + } + + // Alter the Kudu table to a different type by setting the external property fails. + try { + Table alteredTable = table.deepCopy(); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "FALSE"); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + fail(); + } catch (TException e) { + assertTrue(e.getMessage(), + e.getMessage().contains("Kudu table type may not be altered")); + } + + // Alter the Kudu table to the same type by setting the table property works. + { + Table alteredTable = table.deepCopy(); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE"); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + } + + // Alter the Kudu table to a managed type with the master context succeeds. + { + Table alteredTable = table.deepCopy(); + alteredTable.setTableType(TableType.MANAGED_TABLE.toString()); + // Also change the location to avoid MetastoreDefaultTransformer validation + // that exists in some Hive versions. + alteredTable.getSd().setLocation(String.format("%s/%s/%s", + clientConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname), + table.getDbName(), table.getTableName())); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "FALSE"); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE"); + client.alter_table_with_environmentContext(table.getDbName(), table.getTableName(), + alteredTable, masterContext()); + } + + // Alter the Kudu table to a different type by setting the purge property fails. + try { + Table alteredTable = table.deepCopy(); + // Also change the location to avoid MetastoreDefaultTransformer validation + // that exists in some Hive versions. + alteredTable.getSd().setLocation(String.format("%s/%s/%s", + clientConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname), + table.getDbName(), table.getTableName())); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE"); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + fail(); + } catch (TException e) { + assertTrue(e.getMessage(), + e.getMessage().contains("Kudu table type may not be altered")); + } + + // Alter the Kudu table to an external type with the master context succeeds. + { + Table alteredTable = table.deepCopy(); + // Also change the location to avoid MetastoreDefaultTransformer validation + // that exists in some Hive versions. + alteredTable.getSd().setLocation(String.format("%s/%s/%s", + clientConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname), + table.getDbName(), table.getTableName())); + alteredTable.setTableType(TableType.EXTERNAL_TABLE.toString()); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE"); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "TRUE"); + client.alter_table_with_environmentContext(table.getDbName(), table.getTableName(), + alteredTable, masterContext()); + } + + // Altering the table type in a what that maintains sync works. + // In this case an external purge table is the same as a managed table. + { + Table alteredTable = table.deepCopy(); + alteredTable.setTableType(TableType.MANAGED_TABLE.toString()); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "FALSE"); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE"); + // Also change the location to avoid MetastoreDefaultTransformer validation + // that exists in some Hive versions. + alteredTable.getSd().setLocation(String.format("%s/%s/%s", + clientConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname), + table.getDbName(), table.getTableName())); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + } + + // Altering back the table type in a what that maintains sync works. + // In this case a managed table is the same as an external purge table. + { + Table alteredTable = table.deepCopy(); + alteredTable.setTableType(TableType.EXTERNAL_TABLE.toString()); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE"); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "TRUE"); + // Also change the location to avoid MetastoreDefaultTransformer validation + // that exists in some Hive versions. + alteredTable.getSd().setLocation(String.format("%s/%s/%s", + clientConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname), + table.getDbName(), table.getTableName())); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + } + + // Check that adding a column fails. + table.getSd().addToCols(new FieldSchema("b", "int", "")); + // Also change the location to avoid MetastoreDefaultTransformer validation + // that exists in some Hive versions. + table.getSd().setLocation(String.format("%s/%s/%s", + clientConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname), + table.getDbName(), table.getTableName())); + try { + client.alter_table(table.getDbName(), table.getTableName(), table); + fail(); + } catch (TException e) { + assertTrue(e.getMessage(), + e.getMessage().contains("Kudu table columns may not be altered through Hive")); + } + + // Check that adding a column succeeds with the master event property set. + client.alter_table_with_environmentContext( + table.getDbName(), table.getTableName(), table, new EnvironmentContext( + ImmutableMap.of(KuduMetastorePlugin.KUDU_MASTER_EVENT_KEY, "true"))); + + // Check that altering a table property unrelated to Kudu succeeds. + { + Table alteredTable = table.deepCopy(); + alteredTable.putToParameters("some.random.property", "foo"); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + } + + // Check that altering table with Kudu storage handler to legacy format + // succeeds. + { + Table alteredTable = table.deepCopy(); + alteredTable.getParameters().clear(); + alteredTable.setTableType(TableType.EXTERNAL_TABLE.toString()); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE"); + alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "TRUE"); + alteredTable.putToParameters(hive_metastoreConstants.META_TABLE_STORAGE, + KuduMetastorePlugin.LEGACY_KUDU_STORAGE_HANDLER); + alteredTable.putToParameters(KuduMetastorePlugin.KUDU_TABLE_NAME_KEY, + "legacy_table"); + alteredTable.putToParameters(KuduMetastorePlugin.KUDU_MASTER_ADDRS_KEY, + miniCluster.getMasterAddressesAsString()); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + } + } finally { + client.dropTable(table.getDbName(), table.getTableName()); + } + + // Test altering a non-Kudu table. + { + table = initTable.deepCopy(); + table.getParameters().clear(); + client.createTable(table); + table = client.getTable(table.getDbName(), table.getTableName()); + try { + + // Try to alter the table and add a Kudu table ID. + try { + Table alteredTable = table.deepCopy(); + alteredTable.putToParameters(KuduMetastorePlugin.KUDU_TABLE_ID_KEY, + UUID.randomUUID().toString()); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + fail(); + } catch (TException e) { + assertTrue(e.getMessage(), + e.getMessage().contains( + "non-Kudu table entry must not contain a table ID property")); + } + + // Try to alter the table and set a Kudu storage handler. + try { + Table alteredTable = table.deepCopy(); + alteredTable.putToParameters(hive_metastoreConstants.META_TABLE_STORAGE, + KuduMetastorePlugin.KUDU_STORAGE_HANDLER); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + fail(); + } catch (TException e) { + assertTrue( + e.getMessage(), + e.getMessage().contains( + "non-Kudu table entry must not contain the Kudu storage handler")); + } + + // Check that altering the table succeeds. + client.alter_table(table.getDbName(), table.getTableName(), table); + + // Check that altering the legacy table to use the Kudu storage handler + // succeeds. + { + Table alteredTable = legacyTable.deepCopy(); + alteredTable.putToParameters(hive_metastoreConstants.META_TABLE_STORAGE, + KuduMetastorePlugin.KUDU_STORAGE_HANDLER); + alteredTable.putToParameters(KuduMetastorePlugin.KUDU_TABLE_ID_KEY, + UUID.randomUUID().toString()); + alteredTable.putToParameters(KuduMetastorePlugin.KUDU_MASTER_ADDRS_KEY, + miniCluster.getMasterAddressesAsString()); + client.alter_table(legacyTable.getDbName(), legacyTable.getTableName(), + alteredTable); + } + } finally { + client.dropTable(table.getDbName(), table.getTableName()); + } + } + + // Test altering an unsynchronized table is accepted. + { + table = initTable.deepCopy(); + table.setTableType(TableType.EXTERNAL_TABLE.name()); + table.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE"); + table.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE"); + client.createTable(table); + table = client.getTable(table.getDbName(), table.getTableName()); + try { + client.alter_table(table.getDbName(), table.getTableName(), table); + } finally { + client.dropTable(table.getDbName(), table.getTableName()); + } + } + } + + @Test + public void testLegacyTableHandler() throws Exception { + startCluster(/* syncEnabled */ true); + // Test creating a legacy Kudu table without context succeeds. + Table table = newLegacyTable("legacy_table"); + client.createTable(table); + // Get the table from the HMS in case any translation occurred. + table = client.getTable(table.getDbName(), table.getTableName()); + + // Check that altering legacy table's schema succeeds. + { + Table alteredTable = table.deepCopy(); + alteredTable.getSd().addToCols(new FieldSchema("c", "int", "")); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + } + + // Check that renaming legacy table's schema succeeds. + final String newTable = "new_table"; + { + Table alteredTable = table.deepCopy(); + alteredTable.setTableName(newTable); + client.alter_table(table.getDbName(), table.getTableName(), alteredTable); + } + // Test dropping a legacy Kudu table without context succeeds. + client.dropTable(table.getDbName(), newTable); + } + + @Test + public void testDropTableHandler() throws Exception { + startCluster(/* syncEnabled */ true); + // Test dropping a Kudu table. + Table table = newTable("table"); + client.createTable(table, masterContext()); + try { + + // Test with an invalid table ID. + try { + EnvironmentContext envContext = new EnvironmentContext(); + envContext.putToProperties(KuduMetastorePlugin.KUDU_TABLE_ID_KEY, + UUID.randomUUID().toString()); + client.dropTable(table.getCatName(), table.getDbName(), table.getTableName(), + /* delete data */ true, + /* ignore unknown */ false, + envContext); + fail(); + } catch (TException e) { + assertTrue(e.getMessage(), + e.getMessage().contains("Kudu table ID does not match the HMS entry")); + } + } finally { + // Dropping a Kudu table without context should succeed. + client.dropTable(table.getDbName(), table.getTableName()); + } + + // Test dropping a Kudu table with the correct ID. + client.createTable(table, masterContext()); + EnvironmentContext envContext = new EnvironmentContext(); + envContext.putToProperties(KuduMetastorePlugin.KUDU_TABLE_ID_KEY, + table.getParameters().get(KuduMetastorePlugin.KUDU_TABLE_ID_KEY)); + client.dropTable(table.getCatName(), table.getDbName(), table.getTableName(), + /* delete data */ true, + /* ignore unknown */ false, + envContext); + + // Test dropping a non-Kudu table with a Kudu table ID. + { + table.getParameters().clear(); + client.createTable(table); + try { + client.dropTable(table.getCatName(), table.getDbName(), table.getTableName(), + /* delete data */ true, + /* ignore unknown */ false, + envContext); + fail(); + } catch (TException e) { + assertTrue(e.getMessage(), + e.getMessage().contains("Kudu table ID does not match the non-Kudu HMS entry")); + } finally { + client.dropTable(table.getDbName(), table.getTableName()); + } + } + + // Test dropping a non-Kudu table. + { + table.getParameters().clear(); + client.createTable(table); + try { + client.dropTable(table.getCatName(), table.getDbName(), table.getTableName(), + /* delete data */ true, + /* ignore unknown */ false, + envContext); + fail(); + } catch (TException e) { + assertTrue(e.getMessage(), + e.getMessage().contains("Kudu table ID does not match the non-Kudu HMS entry")); + } finally { + client.dropTable(table.getDbName(), table.getTableName()); + } + } + + // Test dropping an unsynchronized table is accepted. + { + table.getParameters().clear(); + table.setTableType(TableType.EXTERNAL_TABLE.name()); + table.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE"); + table.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE"); + client.createTable(table); + client.dropTable(table.getDbName(), table.getTableName()); + } + } + + @Test + public void testSyncDisabled() throws Exception { + startCluster(/* syncEnabled */ false); + + // A Kudu table should should be allowed to be created via Hive. + Table table = newTable("table"); + client.createTable(table); + // Get the table from the HMS in case any translation occurred. + table = client.getTable(table.getDbName(), table.getTableName()); + + // A Kudu table should should be allowed to be altered via Hive. + // Add a column to the original table. + Table newTable = table.deepCopy(); + newTable.getSd().addToCols(new FieldSchema("b", "int", "")); + client.alter_table(table.getDbName(), table.getTableName(), newTable); + + // A Kudu table should should be allowed to be dropped via Hive. + client.dropTable(table.getDbName(), table.getTableName()); + } + + @Test + public void testKuduMetadataUnchanged() throws Exception { + startCluster(/* syncEnabled */ true); + + Table before = newTable("table"); + + // Changing from external purge to managed is true (and vice versa). + { + Table after = before.deepCopy(); + after.setTableType(TableType.MANAGED_TABLE.name()); + after.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "FALSE"); + after.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE"); + assertTrue(KuduMetastorePlugin.kuduMetadataUnchanged(before, after)); + assertTrue(KuduMetastorePlugin.kuduMetadataUnchanged(after, before)); + } + + // Changing from external purge to just external is false (and vice versa). + { + Table after = before.deepCopy(); + after.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE"); + assertFalse(KuduMetastorePlugin.kuduMetadataUnchanged(before, after)); + assertFalse(KuduMetastorePlugin.kuduMetadataUnchanged(after, before)); + } + + // Changing an unrelated property is true. + { + Table after = before.deepCopy(); + after.putToParameters("some.random.property", "foo"); + assertTrue(KuduMetastorePlugin.kuduMetadataUnchanged(before, after)); + } + + // Changing location is true. + { + Table after = before.deepCopy(); + after.getSd().setLocation("path/to/foo"); + assertTrue(KuduMetastorePlugin.kuduMetadataUnchanged(before, after)); + } + + // Changing the master addresses is false. + { + Table after = before.deepCopy(); + after.putToParameters(KuduMetastorePlugin.KUDU_MASTER_ADDRS_KEY, "somehost"); + assertFalse(KuduMetastorePlugin.kuduMetadataUnchanged(before, after)); + } + + // Changing the table name is false. + { + Table after = before.deepCopy(); + after.setTableName("different"); + assertFalse(KuduMetastorePlugin.kuduMetadataUnchanged(before, after)); + } + + // Changing the table owner is false. + { + Table after = before.deepCopy(); + after.setOwner("different"); + assertFalse(KuduMetastorePlugin.kuduMetadataUnchanged(before, after)); + } + + // Changing the table comment is false. + { + Table after = before.deepCopy(); + after.putToParameters(KuduMetastorePlugin.COMMENT_KEY, "new comment"); + assertFalse(KuduMetastorePlugin.kuduMetadataUnchanged(before, after)); + } + + // Adding a column or removing a column is false. + { + Table after = before.deepCopy(); + after.getSd().addToCols(new FieldSchema("b", "int", "")); + assertFalse(KuduMetastorePlugin.kuduMetadataUnchanged(before, after)); + assertFalse(KuduMetastorePlugin.kuduMetadataUnchanged(after, before)); + } + + // Changing a column comment is false. + { + Table after = before.deepCopy(); + after.getSd().getCols().get(0).setComment("new comment"); + assertFalse(KuduMetastorePlugin.kuduMetadataUnchanged(before, after)); + } + } +} diff --git a/java-scala-spark4/kudu-hive/src/test/resources/log4j2.properties b/java-scala-spark4/kudu-hive/src/test/resources/log4j2.properties new file mode 100644 index 0000000000..22762a1560 --- /dev/null +++ b/java-scala-spark4/kudu-hive/src/test/resources/log4j2.properties @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +status = error +name = PropertiesConfig +appenders = console + +appender.console.type = Console +appender.console.name = STDOUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n + +rootLogger.level = info +rootLogger.appenderRefs = stdout +rootLogger.appenderRef.stdout.ref = STDOUT + +logger.kudu.name = org.apache.kudu +logger.kudu.level = debug diff --git a/java-scala-spark4/kudu-jepsen/.gitignore b/java-scala-spark4/kudu-jepsen/.gitignore new file mode 100644 index 0000000000..21252547a6 --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/.gitignore @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Ignore files containing information on prior Leiningen runs/sessions. +.lein-failures +.lein-repl-history +# Ignore jepsen files generated by local test runs. +store/ +# Ignore the result of processing README.adoc with the asciidoctor tool. +README.html diff --git a/java-scala-spark4/kudu-jepsen/README.adoc b/java-scala-spark4/kudu-jepsen/README.adoc new file mode 100644 index 0000000000..827bd5e5a8 --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/README.adoc @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + += jepsen.kudu + +:author: Kudu Team + +A link:http://clojure.org[Clojure] library designed to run +link:http://kudu.apache.org[Apache Kudu] consistency tests using +the link:https://aphyr.com/tags/Jepsen[Jepsen] framework. Currently, a simple +linearizability test for read/write register is implemented and run +for several fault injection scenarios. + +== Prerequisites and Requirements +=== Operating System Requirements +Only Debian/Ubuntu Linux is supported as a platform for the master and tablet +server nodes. Tested to work on Debian 8 Jessie. + +== Overview +The Clojure code is integrated into the project using the +link:https://github.com/nebula-plugins/nebula-clojure-plugin[nebula-clojure-plugin]. +The kudu-jepsen tests are invoked by executing the `runJepsen` task. +The parameters are passed via the standard `-D=` notation. +There is a dedicated Clojure wrapper script +`kudu_test_runner.clj` in `$KUDU_HOME/java/kudu-jepsen/src/utils` which +populates the test environment with appropriate properties and iteratively +runs all the registered tests with different nemeses scenarios. + +== Usage +=== Building +To build the library the following components are required: + +* JDK 8 + +To build the project, run in the parent directory (i.e. `$KUDU_HOME/java`) +[listing] +---- +$ ./gradlew clean assemble +---- + +=== Running +The machines for Kudu master and tserver nodes should be created prior +to running the test: the tests does not create those itself. The machines should +be up and running when starting the test. + +To run the test, the following components are required at the control node: + +* JDK 8 +* SSH client (and optionally, SSH authentication agent) +* gnuplot (to visualize test results) + +Jepsen uses SSH to perform operations at DB nodes. The kudu-jepsen assumes +that SSH keys are installed accordingly: + +* The public part of the SSH key should be added into the `authorized_keys` file + at all DB nodes for the `root` user +* For the SSH private key the options are: +** Add the key to the SSH authentication agent running at the control node +** Specify the path to the file with the key in plain (non-encrypted) format + via the `sshKeyPath` property. + +If using SSH authentication agent to hold the SSH key for DB nodes access, +run in the parent directory: +[listing] +---- +$ ./gradlew runJepsen -DtserverNodes="t0,t1,t2,t3,t4" -DmasterNodes="m0" +---- + +If not using SSH authentication agent, specify the location of the file with +SSH private key via the `sshKeyPath` property: +[listing] +---- +$ ./gradlew runJepsen -DtserverNodes="t0,t1,t2,t3,t4" -DmasterNodes="m0" \ + -DsshKeyPath="/home/user/.ssh/vm_root_id_rsa" +---- + +Note that commas (not spaces) are used to separate the names of the nodes. The +DNS resolver should be properly configured to resolve the specified hostnames +into IP addresses. + +The `tserverNodes` property is used to specify the set of nodes where to run +Kudu tablet servers. The `masterNodes` property is used to specify the set of +nodes to run Kudu master servers. + +In the Jepsen terminology, Kudu master and tserver nodes are playing +*Jepsen DB node* roles. The machine where the above mentioned Gradle command +is run plays *Jepsen control node* role. + +=== A reference script to build Kudu and run Jepsen tests +The following link:../../src/kudu/scripts/jepsen.sh[Bourne-again shell script] +can be used as a reference to build Kudu from source and run Jepsen tests. + +=== Troubleshooting +When Jepsen's analysis doesn't find inconsistencies in the history of operations +it outputs the following in the end of a test: +[listing] +---- +Everything looks good! ヽ(‘ー`)ノ +---- + +However, it might not be the case. If so, it's crucial to understand why the +test failed. + +The majority of the kudu-jepsen test failures can be put into two classification +buckets: + +* An error happened while setting up the testing environment, contacting + machines at the Kudu cluster, starting up Kudu server-side components, or in + any of the other third-party components the Jepsen uses (like clj-ssh), etc. +* The Jepsen's analysis detected inconsistent history of operations. + +The former class of failures might be a manifestation of wrong configuration, +a problem with the test environment, a bug in the test code itself or some +other intermittent failure. Usually, encountering issues like that means the +consistency analysis (which is the last step of a test scenario) cannot run. +Such issues are reported as _errors_ in the summary message. E.g., the example +summary message below reports on 10 errors in 10 tests ran: +[listing] +---- +21:41:42 Ran 10 tests containing 10 assertions. +21:41:42 0 failures, 10 errors. +---- +To get more details, take a closer look at the output of `./gradlew runJepsen` +or at particular `jepsen.log` files in +`$KUDU_HOME/java/kudu-jepsen/store/rw-register/` directory. A +quick way to locate the corresponding section in the error log is to search for +`^ERROR in \(` regex pattern. An example of error message from Jepsen's output: +[listing] +---- +ERROR in (register-test-tserver-random-halves) (KuduException.java:110) +expected: (:valid? (:results (jepsen/run! (tcasefun opts)))) + actual: org.apache.kudu.client.NonRecoverableException: can not complete before timeout: KuduRpc(method=IsCreateTableDone, tablet=null, attempt=28, DeadlineTracker(timeout=30000, elapsed=28571), ... +---- + +The latter class represents more serious issue: a manifestation of +non-linearizable history of operations. This is reported as _failure_ in the +summary message. E.g., the summary message below reports finding 2 instances +of non-linearizable history among 10 tests ran: +[listing] +---- +22:21:52 Ran 10 tests containing 10 assertions. +22:21:52 2 failures, 0 errors. +---- + +If Jepsen's analysis finds non-linearizable history of operations, it outputs +the following in the end of a test: +[listing] +---- +Analysis invalid! (ノಥ益ಥ)ノ ┻━┻ +---- +To troubleshoot, first it's necessary to find where the failed test stores +the results: it should be one of the timestamp-named sub-directories +(e.g. `20170109T071938.000-0800`) under +`$KUDU_HOME/java/kudu-jepsen/store/rw-register` in case of a linearizability +failure in one of the `rw-register` test scenarios. One of the possible ways +to find the directory: +[listing] +---- +$ cd $KUDU_HOME/java/kudu-jepsen/store/rw-register +$ find . -name jepsen.log | xargs grep 'Analysis invalid' +./20170109T071938.000-0800/jepsen.log:Analysis invalid! (ノಥ益ಥ)ノ ┻━┻ +$ +---- +Another way is to find sub-directories where the `linear.svg` file is present: +[listing] +---- +$ cd $KUDU_HOME/java/kudu-jepsen/store/rw-register +$ find . -name linear.svg +./20170109T071938.000-0800/linear.svg +$ +---- +Along with `jepsen.log` and `history.txt` files the failed test generates +`linear.svg` file (gnuplot is required for that). The diagram in `linear.svg` +illustrates the part of the history which Jepsen found inconsistent: +the diagram shows the time/client operation status/system state relationship +and the sequences of legal/illegal operations paths. From this point, the next +step is to locate the corresponding part of the history in the `history.txt` +file. Usually the problem appears around an activation interval of the test +nemesis scenario. Once found, it's possible to tie the vicinity of the +inconsistent operation sequence with the timestamps in the `jepsen.log` file. +Having the timestamps of the operations and their sequence, it's possible to +find relative messages in `kudu-tserver.log` and `kudu-master.log` log files +in sub-directories named as Kudu cluster nodes. Hopefully, that information +is enough to create a reproducible scenario for further troubleshooting +and debugging. diff --git a/java-scala-spark4/kudu-jepsen/build.gradle b/java-scala-spark4/kudu-jepsen/build.gradle new file mode 100644 index 0000000000..c23082a2fc --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/build.gradle @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +apply plugin: "nebula.clojure" + +// Clojure doesn't support Java 12+ +// https://clojure.org/releases/downloads#_java_compatibility +if(JavaVersion.current().isJava12Compatible()) { + // Disable all tasks for the `kudu-jepsen` sub-project. + gradle.taskGraph.whenReady { + gradle.taskGraph.allTasks.each { + it.onlyIf { it.project != project } + } + } +} + +repositories { + maven { url "https://clojars.org/repo/" } +} + +dependencies { + compile project(path: ":kudu-client", configuration: "shadow") + compile project(path: ":kudu-test-utils", configuration: "shadow") + compile libs.clojure + compile libs.clojureToolsCli + compile libs.jepsen + + optional libs.yetusAnnotations +} + +compileClojure { + aotCompile = true +} +// Jepsen tests require specific infrastructure and do not run as part of the regular tests. +clojureTest.enabled = false + +// Run the Jepsen tests. +task runJepsen(type: JavaExec) { + def masterNodes = propertyWithDefault("masterNodes", "m0") + def tserverNodes = propertyWithDefault("tserverNodes", "t0,t1,t2,t3,t4") + def sshKeyPath = propertyWithDefault("sshKeyPath", "") + def iterNum = propertyWithDefault("iterNum", "1") + classpath sourceSets.main.clojure.srcDirs, + sourceSets.test.clojure.srcDirs, + sourceSets.test.runtimeClasspath, + sourceSets.main.runtimeClasspath, + main = "clojure.main" + args = [ + "$projectDir/src/utils/kudu_test_runner.clj", + "--masters=${masterNodes}", + "--tservers=${tserverNodes}", + "--ssh-key-path=${sshKeyPath}", + "--iter-num=${iterNum}" + ] +} + +// We don't publish kudu-jepsen +uploadArchives.enabled = false +install.enabled = false + +// SpotBugs doesn't work on Clojure. +spotbugsMain.enabled = false +spotbugsTest.enabled = false \ No newline at end of file diff --git a/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu.clj b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu.clj new file mode 100644 index 0000000000..6c896ab4c8 --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu.clj @@ -0,0 +1,97 @@ +;; Licensed to the Apache Software Foundation (ASF) under one +;; or more contributor license agreements. See the NOTICE file +;; distributed with this work for additional information +;; regarding copyright ownership. The ASF licenses this file +;; to you under the Apache License, Version 2.0 (the +;; "License"); you may not use this file except in compliance +;; with the License. You may obtain a copy of the License at +;; +;; http://www.apache.org/licenses/LICENSE-2.0 +;; +;; Unless required by applicable law or agreed to in writing, +;; software distributed under the License is distributed on an +;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +;; KIND, either express or implied. See the License for the +;; specific language governing permissions and limitations +;; under the License. + +(ns jepsen.kudu + "Tests for Apache Kudu" + (:require [clojure.tools.logging :refer :all] + [jepsen + [control :as c] + [db :as db] + [net :as net] + [util :as util :refer [meh]]] + [jepsen.control.net :as cnet :refer [heal]] + [jepsen.os.debian :as debian] + [jepsen.kudu.util :as ku])) + +(defn db + "The setup/teardown procedure for a Kudu node. A node can run either + a master or a tablet server." + [] + (reify db/DB + (setup! [_ test node] + (info node "Setting up environment") + (c/su + ;; Restore the network. This is to clean-up left-overs from prior + ;; nemesis-induced grudges. + (meh (cnet/heal)) + + (c/exec :service :rsyslog :start) + + (ku/prepare-node test node) + (ku/sync-time test node) + (ku/start-kudu test node)) + (info node "Kudu ready")) + + (teardown! [_ test node] + (info node "Tearing down Kudu") + (c/su + (when (.contains (:tservers test) node) + (ku/stop-kudu-tserver test node)) + (when (.contains (:masters test) node) + (ku/stop-kudu-master test node))) + ;; TODO collect table data for debugging + (info node "Kudu stopped")) + + db/LogFiles + (log-files [_ test node] + (cond-> [] + (.contains (:tservers test) node) (conj ku/kudu-tserver-log-file) + (.contains (:masters test) node) (conj ku/kudu-master-log-file))))) + + +(defn merge-options + "Merges the common options for all Kudu tests with the specific options + set on the test itself. This does not include 'db' or 'nodes'." + [opts] + (let [default-opts {:os debian/os + :net net/iptables + :db (db) + ;; The list of nodes that will run tablet servers. + :tservers [:n1 :n2 :n3 :n4 :n5] + ;; The list of nodes that will run the kudu master. + :masters [:m1] + :table-name + (str (:name opts) "-" (System/currentTimeMillis)) + :ts-hb-interval-ms 1000 + :ts-hb-max-failures-before-backoff 3 + :ts-raft-hb-interval-ms 50 + :ranges []} + + custom-opts (merge default-opts opts) + + derived-opts {:master-addresses + (ku/concatenate-addresses ku/master-rpc-port + (:masters custom-opts)) + :nodes (vec (concat (:tservers custom-opts) + (:masters custom-opts)))}] + (merge custom-opts derived-opts))) + +;; Common setup for all kudu tests. +(defn kudu-test + "Sets up the test parameters." + [opts] + (merge-options opts)) diff --git a/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/client.clj b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/client.clj new file mode 100644 index 0000000000..f24804fcaa --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/client.clj @@ -0,0 +1,93 @@ +;; Licensed to the Apache Software Foundation (ASF) under one +;; or more contributor license agreements. See the NOTICE file +;; distributed with this work for additional information +;; regarding copyright ownership. The ASF licenses this file +;; to you under the Apache License, Version 2.0 (the +;; "License"); you may not use this file except in compliance +;; with the License. You may obtain a copy of the License at +;; +;; http://www.apache.org/licenses/LICENSE-2.0 +;; +;; Unless required by applicable law or agreed to in writing, +;; software distributed under the License is distributed on an +;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +;; KIND, either express or implied. See the License for the +;; specific language governing permissions and limitations +;; under the License. + +(ns jepsen.kudu.client + "Thin wrappers around Kudu Java client." + (:require [clojure.tools.logging :refer :all]) + (:import [org.apache.kudu ColumnSchema + ColumnSchema$ColumnSchemaBuilder + Schema + Type]) + (:import [org.apache.kudu.client AbstractKuduScannerBuilder + AsyncKuduScanner$ReadMode + CreateTableOptions + KuduClient + KuduClient$KuduClientBuilder + KuduPredicate + KuduPredicate$ComparisonOp + KuduScanner + KuduSession + KuduTable + OperationResponse + PartialRow + RowResult + RowResultIterator])) + +(defn sync-client + "Builds and returns a new synchronous Kudu client." + [master-addresses] + (let [builder (new KuduClient$KuduClientBuilder master-addresses)] + (.defaultAdminOperationTimeoutMs builder 120000) + (.build builder))) + +(defn close-client + [sync-client] + (try (.close sync-client) (catch Exception e (info "Error closing client: " e)))) + +(defn column-schema + ([name type] (column-schema name type false)) + ([name type key?] + (-> (new ColumnSchema$ColumnSchemaBuilder name, type) (.key key?) .build))) + +(defn create-table + [sync-client name schema options] + (.createTable sync-client name schema options)) + +(defn open-table + [sync-client name] + (.openTable sync-client name)) + +(defn rr->tuple + "Transforms a RowResult into a tuple." + [row-result] + (let [columns (-> row-result .getSchema .getColumns)] + (into {} + (for [[idx column] (map-indexed vector columns)] + (let [name (.getName column) + type (.getType column) + value (condp = type + Type/INT8 (.getByte row-result idx) + Type/INT16 (.getShort row-result idx) + Type/INT32 (.getInt row-result idx) + Type/INT64 (.getLong row-result idx) + Type/BINARY (.getBinaryCopy row-result idx) + Type/STRING (.getString row-result idx) + Type/BOOL (.getBoolean row-result idx) + Type/FLOAT (.getFloat row-result idx) + Type/DOUBLE (.getDouble row-result idx) + Type/UNIXTIME_MICROS (.getLong row-result idx))] + {(keyword name) value}))))) + +(defn drain-scanner-to-tuples + "Drains a scanner to a vector of tuples." + [scanner] + (let [result (atom [])] + (while (.hasMoreRows scanner) + (let [rr-iter (.nextRows scanner)] + (while (.hasNext rr-iter) + (swap! result conj (rr->tuple (.next rr-iter)))))) + @result)) diff --git a/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/nemesis.clj b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/nemesis.clj new file mode 100644 index 0000000000..0a12b332b7 --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/nemesis.clj @@ -0,0 +1,131 @@ +;; Licensed to the Apache Software Foundation (ASF) under one +;; or more contributor license agreements. See the NOTICE file +;; distributed with this work for additional information +;; regarding copyright ownership. The ASF licenses this file +;; to you under the Apache License, Version 2.0 (the +;; "License"); you may not use this file except in compliance +;; with the License. You may obtain a copy of the License at +;; +;; http://www.apache.org/licenses/LICENSE-2.0 +;; +;; Unless required by applicable law or agreed to in writing, +;; software distributed under the License is distributed on an +;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +;; KIND, either express or implied. See the License for the +;; specific language governing permissions and limitations +;; under the License. + +(ns jepsen.kudu.nemesis + "Nemeses for Apache Kudu." + (:refer-clojure :exclude [test]) + (:require [jepsen + [control :as c] + [nemesis :as nm] + [net :as net] + [util :as util]] + [clojure.tools.logging :refer :all] + [jepsen.kudu.util :as ku])) + + +(defn tserver-partitioner + "Tablet server partitioner: cut network links between tablet servers + in response to :start operation: cut network links as defined by + (grudge nodes), and restore them back in response to :stop operation." + [grudge] + (reify jepsen.client/Client + (setup! [this test _] + (net/heal! (:net test) test) + this) + + (invoke! [this test op] + (case (:f op) + :start (let [grudge (grudge (:tservers test))] + (nm/partition! test grudge) + (assoc op :value (str "Cut off " (pr-str grudge)))) + :stop (do (net/heal! (:net test) test) + (assoc op :value "fully connected")))) + + (teardown! [this test] + (net/heal! (:net test) test)))) + + +(defn tserver-start-stopper + "Takes a targeting function which, given a list of nodes, returns a single + node or collection of nodes to affect, and two functions `(start! test node)` + invoked on nemesis start, and `(stop! test node)` invoked on nemesis stop. + Returns a nemesis which responds to :start and :stop by running the start! + and stop! fns on each of the given nodes. During `start!` and `stop!`, binds + the `jepsen.control` session to the given node, so you can just call `(c/exec + ...)`. + + Re-selects a fresh node (or nodes) for each start--if targeter returns nil, + skips the start. The return values from the start and stop fns will become + the :values of the returned :info operations from the nemesis, e.g.: + + {:value {:n1 [:killed \"java\"]}}" + [targeter start! stop!] + (let [nodes (atom nil)] + (reify jepsen.client/Client + (setup! [this test _] this) + + (invoke! [this test op] + (locking nodes + (assoc op :type :info, :value + (case (:f op) + :start (if-let [ns (-> test :tservers targeter util/coll)] + (if (compare-and-set! nodes nil ns) + (c/on-many ns (start! test c/*host*)) + (str "nemesis already disrupting " + (pr-str @nodes))) + :no-target) + :stop (if-let [ns @nodes] + (let [value (c/on-many ns (stop! test c/*host*))] + (reset! nodes nil) + value) + :not-started))))) + + (teardown! [this test])))) + + +(defn tserver-partition-random-halves + "Cuts the tablet servers' network into randomly chosen halves." + [] + (tserver-partitioner (comp nm/complete-grudge nm/bisect shuffle))) + + +(defn tserver-partition-majorities-ring + "A grudge in which every tablet server can see a majority, but no server sees + the *same* majority as any other." + [] + (tserver-partitioner nm/majorities-ring)) + + +(defn kill-restart-tserver + "Responds to `{:f :start}` by sending SIGKILL to the tablet server on a given + node, and when `{:f :stop}` arrives, re-starts the specified tablet server. + Picks the node(s) using `(targeter list-of-nodes)`. Targeter may return + either a single node or a collection of nodes." + ([targeter] + (tserver-start-stopper targeter + (fn start [t n] + (c/su (c/exec :killall :-s :SIGKILL :kudu-tserver)) + [:killed :kudu-tserver]) + (fn stop [t n] + (ku/start-kudu-tserver t n) + [:started :kudu-tserver])))) + +(defn tserver-hammer-time + "Responds to `{:f :start}` by pausing the tablet server name on a given node + using SIGSTOP, and when `{:f :stop}` arrives, resumes it with SIGCONT. + Picks the node(s) to pause using `(targeter list-of-nodes)`, which defaults + to `rand-nth`. Targeter may return either a single node or a collection + of nodes." + ([] (tserver-hammer-time rand-nth)) + ([targeter] + (tserver-start-stopper targeter + (fn start [t n] + (c/su (c/exec :killall :-s "STOP" :kudu-tserver)) + [:paused :kudu-tserver]) + (fn stop [t n] + (c/su (c/exec :killall :-s "CONT" :kudu-tserver)) + [:resumed :kudu-tserver])))) diff --git a/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/register.clj b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/register.clj new file mode 100644 index 0000000000..4c08b6418c --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/register.clj @@ -0,0 +1,88 @@ +;; Licensed to the Apache Software Foundation (ASF) under one +;; or more contributor license agreements. See the NOTICE file +;; distributed with this work for additional information +;; regarding copyright ownership. The ASF licenses this file +;; to you under the Apache License, Version 2.0 (the +;; "License"); you may not use this file except in compliance +;; with the License. You may obtain a copy of the License at +;; +;; http://www.apache.org/licenses/LICENSE-2.0 +;; +;; Unless required by applicable law or agreed to in writing, +;; software distributed under the License is distributed on an +;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +;; KIND, either express or implied. See the License for the +;; specific language governing permissions and limitations +;; under the License. + +(ns jepsen.kudu.register + "Simple linearizability test for a read/write register." + (:refer-clojure :exclude [test]) + (:require [jepsen + [kudu :as kudu] + [checker :as checker] + [generator :as gen] + [nemesis :as nemesis]] + [jepsen.kudu.client :as kc] + [jepsen.kudu.table :as kt] + [knossos.model :as model])) + +(def register-key "x") + +(defn r [_ _] {:type :invoke, :f :read, :value nil}) +(defn w [_ _] {:type :invoke, :f :write, :value (rand-int 10)}) + +(defn client + [client-atom kclient ktable] + (reify jepsen.client/Client + (setup! [_ test _] + "Create the client and the test table. Use the same Kudu client instance " + "across all test actors to achieve timestamp propagation for all " + "operations." + (let [table-name (:table-name test) + kclient (locking client-atom + (when (compare-and-set! + client-atom nil (kc/sync-client (:master-addresses test))) + (kc/create-table @client-atom table-name kt/kv-table-schema + (let [ranges (:table-ranges test) + rep-factor (:num-replicas test)] + (if (nil? ranges) + (kt/kv-table-options-hash rep-factor (count (:tservers test))) + (kt/kv-table-options-range rep-factor ranges)))) + @client-atom) + @client-atom) + ktable (kc/open-table kclient table-name)] + (client client-atom kclient ktable))) + + (invoke! [_ _ op] + (case (:f op) + :read (assoc op :type :ok, + :value (kt/kv-read kclient ktable register-key)) + :write (do (kt/kv-write kclient ktable register-key (:value op)) + (assoc op :type :ok)))) + + (teardown! [_ _] + (kc/close-client kclient)))) + +(defn register-test + [opts] + (kudu/kudu-test + (merge + {:name "rw-register" + :client (client (atom nil) nil nil) + :concurrency 10 + :num-replicas 5 + :nemesis nemesis/noop + :model (model/register) + :generator (->> (gen/reserve 5 (gen/mix [w r]) r) + (gen/stagger 1/3) + (gen/nemesis + (gen/seq (cycle [(gen/sleep 5) + {:type :info, :f :start} + (gen/sleep 5) + {:type :info, :f :stop}]))) + (gen/time-limit 60)) + :checker (checker/compose + {:perf (checker/perf) + :linear checker/linearizable})} + opts))) diff --git a/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/sets.clj b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/sets.clj new file mode 100644 index 0000000000..9bfdcd6999 --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/sets.clj @@ -0,0 +1,217 @@ +;; Licensed to the Apache Software Foundation (ASF) under one +;; or more contributor license agreements. See the NOTICE file +;; distributed with this work for additional information +;; regarding copyright ownership. The ASF licenses this file +;; to you under the Apache License, Version 2.0 (the +;; "License"); you may not use this file except in compliance +;; with the License. You may obtain a copy of the License at +;; +;; http://www.apache.org/licenses/LICENSE-2.0 +;; +;; Unless required by applicable law or agreed to in writing, +;; software distributed under the License is distributed on an +;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +;; KIND, either express or implied. See the License for the +;; specific language governing permissions and limitations +;; under the License. + +;; The 'sets' checker validates that: +;; 1) the total row count of a shared table read by a client is +;; greater than or equal to the count of successful writes +;; performed by that client. +;; 2) the row count never goes down from the previous reads of +;; the same client. +;; +;; For example, sets' checker considers a history as follows valid: +;; +;; [{:type :invoke, :f :add, :value 1, :process 0} +;; {:type :ok, :f :add, :value 1, :process 0} +;; {:type :invoke, :f :count, :value nil, :process 1} +;; {:type :ok, :f :count, :value 2, :process 1} +;; {:type :invoke, :f :add, :value 22, :process 0} +;; {:type :ok, :f :add, :value 22, :process 0} +;; {:type :invoke, :f :count, :value nil, :process 0} +;; {:type :ok, :f :count, :value 2, :process 0}] +;; +;; while the following sequence as invalid. Since after process 0 +;; successfully inserted three rows, the total number of rows +;; returned from the count operation of the same process is two, +;; which conflicts with the validation constraints: +;; +;; [{:type :invoke, :f :add, :value 1, :process 0} +;; {:type :ok, :f :add, :value 1, :process 0} +;; {:type :invoke, :f :add, :value 2, :process 0} +;; {:type :ok, :f :add, :value 2, :process 0} +;; {:type :invoke, :f :count, :value nil, :process 0} +;; {:type :ok, :f :count, :value 2, :process 0} +;; {:type :invoke, :f :count, :value nil, :process 1} +;; {:type :ok, :f :count, :value 2, :process 1} +;; {:type :invoke, :f :add, :value 13, :process 1} +;: {:type :ok, :f :add, :value 13, :process 1} +;; {:type :invoke, :f :add, :value 22, :process 0} +;; {:type :ok, :f :add, :value 22, :process 0} +;; {:type :invoke, :f :count, :value nil, :process 0} +;; {:type :ok, :f :count, :value 2, :process 0} +;; {:type :invoke, :f :add, :value 250, :process 1} +;; {:type :ok, :f :add, :value 250, :process 1}] +;; +;; This checker model is a variation of cockroach 'sets' checker, +;; see https://github.com/jepsen-io/jepsen/blob/master/cockroachdb/src/jepsen/cockroach/sets.clj. +(ns jepsen.kudu.sets + "Set test" + (:refer-clojure :exclude [test]) + (:require [jepsen [kudu :as kudu] + [client :as client] + [checker :as checker] + [generator :as gen] + [nemesis :as nemesis]] + [jepsen.kudu.client :as kc] + [jepsen.kudu.table :as kt] + [clojure.core.reducers :as r] + [knossos.op :as op]) + (:import [org.apache.kudu.client AsyncKuduScanner$ReadMode])) + +(defn add-op [] + "The add operation to be performed by the processes." + (->> (range) + (map (partial array-map + :type :invoke + :f :add + :value)))) + +(defn count-op [] + "The count operation to be performed by the processes." + {:type :invoke, :f :count, :value nil}) + +(defn count-by-set + "Given a set of :add operations or a :count operation, count the number + of successful :add operations or get the value from the :count operation." + [history] + (let [adds (->> history + (r/filter op/ok?) + (r/filter #(= :add (:f %))) + (r/map :value) + (into #{})) + counts (->> history + (r/filter op/ok?) + (r/filter #(= :count (:f %))) + (r/map :value) + (reduce (fn [_ x] x) nil))] + {:counts counts :adds (count adds)})) + +(defn validate-counts + "Validate a series of :add and :count operations, making sure that: + 1) the total row count of the table read is greater than or equal + to the count of successful writes. + 2) the count never goes down from the previous counts." + [history] + (let [results (reduce-kv + (fn [coll key value] + (let [last-coll (last coll) + last-count (:total-counts last-coll) + adds (+ (:total-adds last-coll 0) (:adds value))] + (if (= (:counts value) nil) + (conj coll (assoc value :total-counts last-count :total-adds adds + :valid? (:valid? last-coll))) + ;; validates the total row count is greater than or equal + ;; to the count of writes, and the row count never goes down. + (let [is-valid (and (<= adds (:counts value)) + (<= last-count (:counts value)))] + (conj coll (assoc value :total-counts (:counts value) :total-adds adds + :valid? (and is-valid (:valid? last-coll)))))))) + [{:valid? true :total-counts 0 :total-adds 0 }] + history)] + results)) + +(defn partition-by-set + "Given a history of operations, partition it to multiple groups of + sets. Each set is composed of a set of :add operations followed by + a :count operation. And validate each set." + [history] + (let [partition-history (->> history + (partition-by #(and (= :count (:f %)) + (= :ok (:type %)))) + (into []))] + + (let [count-map (reduce-kv + (fn [coll key value] + (assoc coll key (count-by-set value))) + {} + partition-history)] + (let [result (->> count-map + (sort) + (vals) + (remove #(and (= (:counts %) nil) (= (:adds %) 0))) + (into []) + (validate-counts) + (last) + (:valid?))] + {:valid? result})))) + +(defn check-sets + "Given the history of all processes/clients, validate the history per + process/clients." + [] + (reify checker/Checker + (check [this test model history opts] + (let [results (reduce-kv + (fn [coll key val] + (assoc coll key (partition-by-set val))) {} (group-by :process history))] + (assoc results :valid? (jepsen.checker/merge-valid (mapcat vals (vals results)))))))) + +(defn client + "Create a shared table if it doesn't exist. The client can perform + two kinds of operations, :add that inserts a unique row into the + table, :count that counts the number of rows of the table." + [table-created? kclient ktable read-mode] + (reify client/Client + (setup! [_ test _] + (let [kclient (kc/sync-client (:master-addresses test)) + table-name (:table-name test) + ktable (locking table-created? + (when (compare-and-set! table-created? false true) + (kc/create-table kclient table-name kt/kv-table-schema + (let [ranges (:table-ranges test) + rep-factor (:num-replicas test)] + (if (nil? ranges) + (kt/kv-table-options-hash rep-factor (count (:tservers test))) + (kt/kv-table-options-range rep-factor ranges))))) + (kc/open-table kclient table-name))] + (client table-created? kclient ktable read-mode))) + + (invoke! [_ _ op] + (case (:f op) + :count (assoc op :type :ok, + :value (kt/count-rows kclient ktable read-mode)) + :add (do (kt/kv-write kclient ktable (str (:value op)) (:value op)) + (assoc op :type :ok)))) + (teardown! [_ _] + (kc/close-client kclient)))) + +(defn sets-test + "This test creates multiple clients. Each client either writes a + unique value in a shared table or counts the number of rows for that + table concurrently. It uses 'sets' checker to validate Read-Your-Writes + and Read-Your-Reads consistency." + [opts] + (kudu/kudu-test + (merge + {:name "sets" + :client (client (atom false) nil nil AsyncKuduScanner$ReadMode/READ_YOUR_WRITES) + :concurrency 10 + :num-replicas 5 + :nemesis nemesis/noop + ;; generator take a random mixture of add operations (that inserts + ;; a sequence of values) and count operations. + :generator (->> (gen/mix [(->> (add-op) + gen/seq + (gen/stagger 1)) count-op]) + (gen/stagger 1/3) + (gen/nemesis + (gen/seq (cycle [(gen/sleep 5) + {:type :info, :f :start} + (gen/sleep 5) + {:type :info, :f :stop}]))) + (gen/time-limit 60)) + :checker (check-sets)} + opts))) \ No newline at end of file diff --git a/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/table.clj b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/table.clj new file mode 100644 index 0000000000..a58acb3b9a --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/table.clj @@ -0,0 +1,115 @@ +;; Licensed to the Apache Software Foundation (ASF) under one +;; or more contributor license agreements. See the NOTICE file +;; distributed with this work for additional information +;; regarding copyright ownership. The ASF licenses this file +;; to you under the Apache License, Version 2.0 (the +;; "License"); you may not use this file except in compliance +;; with the License. You may obtain a copy of the License at +;; +;; http://www.apache.org/licenses/LICENSE-2.0 +;; +;; Unless required by applicable law or agreed to in writing, +;; software distributed under the License is distributed on an +;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +;; KIND, either express or implied. See the License for the +;; specific language governing permissions and limitations +;; under the License. + +(ns jepsen.kudu.table + "Utilities to work with kudu tables, for testing." + (:require [clojure.tools.logging :refer :all] + [jepsen.kudu.client :as c]) + (:import [org.apache.kudu ColumnSchema + ColumnSchema$ColumnSchemaBuilder + Schema + Type]) + (:import [org.apache.kudu.client AbstractKuduScannerBuilder + AsyncKuduScanner$ReadMode + CreateTableOptions + KuduClient + KuduClient$KuduClientBuilder + KuduPredicate + KuduPredicate$ComparisonOp + KuduScanner + KuduSession + KuduTable + OperationResponse + PartialRow + ReplicaSelection + RowResult + RowResultIterator + Upsert])) +;; +;; KV Table utilities +;; + +;; Creates a basic schema for a Key/Value table where the key is a string and +;; the value is an int. +(def kv-table-schema + (new Schema [(c/column-schema "key" Type/STRING true) + (c/column-schema "value" Type/INT32 false)])) + + +(defn kv-table-options-range + "Returns options to create a K/V table with partitions on 'ranges'. + Ranges should be a vector of [start, end) keys. The resulting table + has (count ranges) tablets with the exact coverage set on the ranges. + The resulting table has the specified replication factor." + [num-replicas ranges] + (let [options (new CreateTableOptions)] + (.setRangePartitionColumns options ["key"]) + (.setNumReplicas options num-replicas) + (doseq [range ranges] + (let [lower (.newPartialRow kv-table-schema) + upper (.newPartialRow kv-table-schema)] + (.addString lower "key" (get range 0)) + (.addString upper "key" (get range 1)) + (.addRangePartition options lower upper))) + options)) + + +(defn kv-table-options-hash + "Returns options to create a K/V table with key column hash partitioned + into the given number of buckets. The resulting table has the specified + replication factor." + [num-replicas buckets-num] + (let [options (new CreateTableOptions)] + (.setRangePartitionColumns options ["key"]) + (.setNumReplicas options num-replicas) + (.addHashPartitions options ["key"] buckets-num) + options)) + + +(defn kv-write + "Upsert a row on a KV table." + [sync-client table key value] + (let [upsert (.newUpsert table) + row (.getRow upsert)] + (.addString row "key" key) + (.addInt row "value" (int value)) + (let [response (.apply (.newSession sync-client) upsert)] + (assert (not (.hasRowError response)) (str "Got a row error: " response))))) + +(defn kv-read + "Read the value associated with key." + [sync-client table key] + (let [scanner-builder (.newScannerBuilder sync-client table) + predicate (KuduPredicate/newComparisonPredicate (c/column-schema "key" Type/STRING) + KuduPredicate$ComparisonOp/EQUAL + key)] + (.readMode scanner-builder AsyncKuduScanner$ReadMode/READ_AT_SNAPSHOT) + (.addPredicate scanner-builder predicate) + (let [rows (c/drain-scanner-to-tuples (.build scanner-builder))] + (case (count rows) + 0 nil + 1 (:value (get rows 0)) + (assert false (str "Expected 0 or 1 rows. Got: " (count rows))))))) + +(defn count-rows + "Count the rows of the given table." + [sync-client table read-mode] + (let [scanner-builder (.newScannerBuilder sync-client table)] + (.readMode scanner-builder read-mode) + (.replicaSelection scanner-builder ReplicaSelection/CLOSEST_REPLICA) + (let [rows (c/drain-scanner-to-tuples (.build scanner-builder))] + (count rows)))) diff --git a/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/util.clj b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/util.clj new file mode 100644 index 0000000000..a2b82214b3 --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/main/clojure/jepsen/kudu/util.clj @@ -0,0 +1,424 @@ +;; Licensed to the Apache Software Foundation (ASF) under one +;; or more contributor license agreements. See the NOTICE file +;; distributed with this work for additional information +;; regarding copyright ownership. The ASF licenses this file +;; to you under the Apache License, Version 2.0 (the +;; "License"); you may not use this file except in compliance +;; with the License. You may obtain a copy of the License at +;; +;; http://www.apache.org/licenses/LICENSE-2.0 +;; +;; Unless required by applicable law or agreed to in writing, +;; software distributed under the License is distributed on an +;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +;; KIND, either express or implied. See the License for the +;; specific language governing permissions and limitations +;; under the License. + +(ns jepsen.kudu.util + "Utilities for Apache Kudu jepsen tests" + (:require [clojure.tools.logging :refer :all] + [clojure.java.io :as io] + [clojure.string :as str] + [jepsen + [control :as c :refer [|]] + [util :as util :refer [meh]]] + [jepsen.control.util :as cu] + [jepsen.os.debian :as debian])) + +(defn path + "Returns the filesystem path for the path components joined with Unix path + separator." + [& components] + (str/join "/" components)) + +;; TODO(aserbin): make it possible to set the version via a run-time option. +;; +;; The empty string corresponds to the latest snapshot from the main trunk. +;; To run against some other branch, set to ".."; +;; e.g. set "1.2.0" to run against packages built for Kudu 1.2.0 release. +(def kudu-pkg-version "") + +(def kudu-repo-url + (str "http://repos.jenkins.cloudera.com/kudu" kudu-pkg-version + "-nightly/debian/jessie/amd64/kudu")) +(def kudu-repo-name "kudu-nightly") +(def kudu-repo-apt-line (str "deb " kudu-repo-url " jessie-kudu contrib")) +(def kudu-required-packages + "The set of the required system packages (more are installed by dependency)." + [:libsasl2-modules + :libsasl2-modules-gssapi-mit + :lsb-release + :ntp + :openssl]) +(def kudu-master-pkg :kudu-master) +(def kudu-tserver-pkg :kudu-tserver) + +(def kudu-build-dir "../../build/latest") + +(def kudu-conf-dir "/etc/kudu/conf") +(def kudu-master-gflagfile (path kudu-conf-dir "master.gflagfile")) +(def kudu-tserver-gflagfile (path kudu-conf-dir "tserver.gflagfile")) +(def kudu-target-bin-dir "/opt/local/bin") +(def kudu-target-sbin-dir "/opt/local/sbin") +(def kudu-uname "kudu") +(def kudu-uid 999) +(def kudu-gname "kudu") +(def kudu-gid 999) +(def kudu-home-dir "/var/lib/kudu") +(def kudu-master-home-dir (path kudu-home-dir "master")) +(def kudu-tserver-home-dir (path kudu-home-dir "tserver")) +(def kudu-log-dir "/var/log/kudu") +(def kudu-master-log-file (path kudu-log-dir "kudu-master.log")) +(def kudu-tserver-log-file (path kudu-log-dir "kudu-tserver.log")) +(def kudu-run-dir "/var/run/kudu") +(def kudu-master-pid-file (path kudu-run-dir "kudu-master-kudu.pid")) +(def kudu-tserver-pid-file (path kudu-run-dir "kudu-tserver-kudu.pid")) + +(def master-rpc-port 7051) +(def tserver-rpc-port 7050) + +(defn kudu-cli + "Returns path to the Kudu CLI tool or just binary name, if it's appropriate + to rely on the PATH environment variable." + [test] + (if (:use-packages? test) + "kudu" ;; relying on the standard PATH env variable + (path kudu-target-bin-dir "kudu"))) + + +(defn concatenate-addresses + "Returns a list of the addresses in form 'h0:port,h1:port,h2:port' + given a port and list of hostnames." + [port hosts] + (str/join "," (map #(str (name %) ":" (str port)) hosts))) + + +(defn group-exist? + "If the specified group exists?" + [group-name] + (try (c/exec :egrep (str "^" group-name) "/etc/group") + true + (catch RuntimeException _ false))) + + +(defn user-exist? + "If the specified user exists?" + [user-name] + (try (c/exec :id user-name) + true + (catch RuntimeException _ false))) + + +(defn ntp-in-sync? + "Is the NTP server in sync state? This function should be called in the + context of already established SSH session at the node." + [] + (try (c/exec :ntp-wait :-n1 :-s1) + true + (catch RuntimeException _ false))) + + +(defn kudu-master-in-service? + "Is the Kudu master process at the specified node in service already? + This function should be called in the context of already established SSH + session at the node." + [test node] + (try (c/exec :sudo :-u kudu-uname (kudu-cli test) :table :list node) + true + (catch RuntimeException _ false))) + + +(defn kudu-master-see-tservers? + "Whether the Kudu master sees the specified number of tablet servers." + [test node tservers-count] + (let [count-regex (str "^"(str tservers-count)"$")] + (try (c/exec :sudo :-u kudu-uname + (kudu-cli test) :tserver :list node :--format=space | + :wc :-l | + :tr :-d (c/lit "[:space:]") | + :grep :-E count-regex) + true + (catch RuntimeException _ false)))) + + +(defn kudu-tserver-in-service? + "Is the Kudu tserver process at the specified node up and running? + This function should be called in the context of already established SSH + session at the node." + [test node] + (try (c/exec :sudo :-u kudu-uname (kudu-cli test) :tserver :status node) + true + (catch RuntimeException _ false))) + + +(defn start-kudu-master + "Start Kudu master daemon at the specified node. This function should + be called in the super-user context (jepsen.control/su)." + [test node] + (info node "Starting Kudu Master") + (let [use-svc-scripts? (:use-packages? test)] + (if use-svc-scripts? + (c/exec :service :kudu-master :start) + (c/exec :sudo :-u kudu-uname :start-stop-daemon + :--start + :--background + :--make-pidfile + :--pidfile kudu-master-pid-file + :--chdir kudu-home-dir + :--no-close + :--oknodo + :--exec (path kudu-target-sbin-dir "kudu-master") + :-- + :--flagfile kudu-master-gflagfile + :>> kudu-master-log-file (c/lit "2>&1")))) + + ;; Wait for master services avaiable (awaiting for the catalog manager). + (loop [iteration 0] + (when-not (kudu-master-in-service? test node) + (if (> iteration 100) + (c/exec :echo "timeout waiting for master server to start" (c/lit ";") + :false) + (do + (Thread/sleep 500) + (recur (inc iteration)))))) + + ;; Wait until the master sees all tservers in the cluster. Otherwise + ;; the client would not be able to create a table with the desired + ;; replication factor when not all tservers have registered yet. + (let [tservers-count (count (:tservers test))] + (loop [iteration 0] + (when-not (kudu-master-see-tservers? test node tservers-count) + (if (> iteration 200) + (c/exec :echo "timeout waiting for all tservers to start" (c/lit ";") + :false) + (do + (Thread/sleep 500) + (recur (inc iteration)))))))) + + +(defn stop-kudu-master + "Stop Kudu master daemon at the specified node. This function should be + called in the super-user context (jepsen.control/su)." + [test node] + (info node "Stopping Kudu Master") + (let [use-svc-scripts? (:use-packages? test)] + (if use-svc-scripts? + (meh (c/exec :service :kudu-master :stop)) + (cu/stop-daemon! "kudu-master" kudu-master-pid-file)))) + + +(defn start-kudu-tserver + "Start Kudu tablet server daemon at the specified node. This function + should be called in the super-user context (jepsen.control/su)." + [test node] + (info node "Starting Kudu Tablet Server") + (let [use-svc-scripts? (:use-packages? test)] + (if use-svc-scripts? + (c/exec :service :kudu-tserver :start) + (c/exec :sudo :-u kudu-uname :start-stop-daemon + :--start + :--background + :--make-pidfile + :--pidfile kudu-tserver-pid-file + :--chdir kudu-home-dir + :--no-close + :--oknodo + :--exec (path kudu-target-sbin-dir "kudu-tserver") + :-- + :--flagfile kudu-tserver-gflagfile + :>> kudu-tserver-log-file (c/lit "2>&1")))) + + ;; Wait for the tablet server to become on-line. + (loop [iteration 0] + (when-not (kudu-tserver-in-service? test node) + (if (> iteration 100) + (c/exec :echo "timeout waiting for tablet server to start" (c/lit ";") + :false) + (do + (Thread/sleep 500) + (recur (inc iteration))))))) + + +(defn stop-kudu-tserver + "Stops Kudu Tablet Server on the specified node." + [test node] + (info node "Stopping Kudu Tablet Server") + (let [use-svc-scripts? (:use-packages? test)] + (if use-svc-scripts? + (meh (c/exec :service :kudu-tserver :stop)) + (cu/stop-daemon! "kudu-tserver" kudu-tserver-pid-file)))) + + +(defn kudu-cfg-master + "Returns Kudu master flags file contents." + [test] + (let [data-path kudu-master-home-dir + flags [(str "--fs_wal_dir=" data-path) + (str "--fs_data_dirs=" data-path) + (str "--log_dir=" kudu-log-dir) + (str "--rpc_bind_addresses=0.0.0.0:" (str master-rpc-port))]] + ;; Only set the master addresses when there is more than one master + (str/join "\n" + (if (> (count (:masters test)) 1) + (conj flags (str "--master_addresses=" + (concatenate-addresses master-rpc-port + (:masters test)))) + flags)))) + + +(defn kudu-cfg-tserver + "Returns Kudu tserver flags file contents." + [test] + (let [data-path kudu-tserver-home-dir + flags [(str "--fs_wal_dir=" data-path) + (str "--fs_data_dirs=" data-path) + (str "--log_dir=" kudu-log-dir) + (str "--rpc_bind_addresses=0.0.0.0:" (str tserver-rpc-port)) + (str "--heartbeat_interval_ms=" + (str (:ts-hb-interval-ms test))) + (str "--raft_heartbeat_interval_ms=" + (str (:ts-raft-hb-interval-ms test))) + (str "--heartbeat_max_failures_before_backoff=" + (str (:ts-hb-max-failures-before-backoff test)))]] + (str/join "\n" (conj flags (str "--tserver_master_addrs=" + (concatenate-addresses master-rpc-port + (:masters test))))))) + + +(defn ntp-server-config + "Returns ntp.conf contents for Kudu master node." + [] + (let [common-opts (slurp (io/resource "ntp.conf.common")) + server-opts (slurp (io/resource "ntp.conf.server"))] + (str common-opts "\n" server-opts))) + + +(defn ntp-slave-config + "Returns ntp.conf contents for Kudu tserver node." + [servers] + (let [common-opts (slurp (io/resource "ntp.conf.common")) + server-lines (map #(str "server " (name %) + " burst iburst prefer minpoll 4 maxpoll 4") + servers)] + (str common-opts "\n" (str/join "\n" server-lines)))) + + +(defn prepare-node-with-pkgs + "Prepare a Kudu node: install Kudu using packages." + [test node] + (let [repo-file (str "/etc/apt/sources.list.d/" + (name kudu-repo-name) ".list")] + (when-not (cu/exists? repo-file) + (info node "Adding " kudu-repo-name " package repositoy") + (debian/add-repo! kudu-repo-name kudu-repo-apt-line) + (info node "Fetching " kudu-repo-name " package key") + (c/exec :curl :-fLSs (str kudu-repo-url "/" "archive.key") | + :apt-key :add :-) + (info node "Updating package index") + (debian/update!))) + + (when (.contains (:masters test) node) + (when-not (debian/installed? kudu-master-pkg) + (info node "Installing kudu-master package") + (debian/install kudu-master-pkg))) + (when (.contains (:tservers test) node) + (when-not (debian/installed? kudu-tserver-pkg) + (info node "Installing kudu-tserver package") + (debian/install kudu-tserver-pkg)))) + + +(defn prepare-node-with-binaries + "Prepare Kudu node: create the directory structure and place necessary + Kudu binaries at place." + [test node] + + (when-not (group-exist? kudu-gname) + (c/exec :groupadd :-o :-g kudu-gid kudu-gname)) + (when-not (user-exist? kudu-uname) + (c/exec :useradd :-o :-u kudu-uid :-g kudu-gname :-d kudu-home-dir + :-s "/usr/sbin/nologin" kudu-uname)) + + ;; Prepare directory structure for the files. + (c/exec :mkdir :-p kudu-conf-dir) + (when (.contains (:masters test) node) + (c/exec :mkdir :-p path kudu-master-home-dir)) + (when (.contains (:tservers test) node) + (c/exec :mkdir :-p path kudu-tserver-home-dir)) + (c/exec :chown :-R (str kudu-uname ":" kudu-gname) kudu-home-dir) + + (c/exec :mkdir :-p kudu-run-dir) + (c/exec :chown :-R (str kudu-uname ":" kudu-gname) kudu-run-dir) + + (c/exec :mkdir :-p kudu-log-dir) + (c/exec :chown :-R (str kudu-uname ":" kudu-gname) kudu-log-dir) + + (c/exec :rm :-rf kudu-target-bin-dir) + (c/exec :mkdir :-p kudu-target-bin-dir) + + (c/exec :rm :-rf kudu-target-sbin-dir) + (c/exec :mkdir :-p kudu-target-sbin-dir) + + ;; Copy appropriate binaries to the node. + (when (.contains (:masters test) node) + (let [master-binary-src (path kudu-build-dir "bin" "kudu-master") + master-binary-dst (path kudu-target-sbin-dir "kudu-master")] + (c/upload master-binary-src kudu-target-sbin-dir) + (c/exec :chmod 755 master-binary-dst))) + (when (.contains (:tservers test) node) + (let [tserver-binary-src (path kudu-build-dir "bin" "kudu-tserver") + tserver-binary-dst (path kudu-target-sbin-dir "kudu-tserver")] + (c/upload tserver-binary-src kudu-target-sbin-dir) + (c/exec :chmod 755 tserver-binary-dst))) + (let [kudu-cli-binary-src (path kudu-build-dir "bin" "kudu") + kudu-cli-binary-dst (path kudu-target-bin-dir "kudu")] + (c/upload kudu-cli-binary-src kudu-target-bin-dir) + (c/exec :chmod 755 kudu-cli-binary-dst))) + + +(defn prepare-node + "Prepare Kudu node using either packaged Kudu software or + assorted Kudu binaries for the server-side components." + [test node] + (when-not (debian/installed? kudu-required-packages) + (info node "Installing required packages") + (debian/install kudu-required-packages)) + + + (if (:use-packages? test) + (prepare-node-with-pkgs test node) + (prepare-node-with-binaries test node)) + + (when (.contains (:masters test) node) + (c/exec :rm :-rf kudu-master-home-dir) + (c/exec :rm :-f kudu-master-log-file) + (c/exec :echo (str (slurp (io/resource "kudu.flags")) + "\n" (kudu-cfg-master test)) :> kudu-master-gflagfile)) + (when (.contains (:tservers test) node) + (c/exec :rm :-rf kudu-tserver-home-dir) + (c/exec :rm :-f kudu-tserver-log-file) + (c/exec :echo (str (slurp (io/resource "kudu.flags")) + "\n" (kudu-cfg-tserver test)) :> kudu-tserver-gflagfile))) + + +(defn sync-time + "When ntpd is not in synchronized state, revamps its configs and restarts + the ntpd daemon." + [test node] + (when-not (ntp-in-sync?) + (c/exec :service :ntp :stop "||" :true) + (c/exec :echo "NTPD_OPTS='-g -N'" :> "/etc/default/ntp") + (when (.contains (:masters test) node) + (c/exec :echo (ntp-server-config) :> "/etc/ntp.conf")) + (when (.contains (:tservers test) node) + (c/exec :echo (ntp-slave-config (:masters test)):> "/etc/ntp.conf")) + (c/exec :service :ntp :start) + ;; Wait for 5 minutes max for ntpd to get into synchronized state. + (c/exec :ntp-wait :-s1 :-n300))) + + +(defn start-kudu + "Start Kudu services on the node." + [test node] + (when (.contains (:masters test) node) (start-kudu-master test node)) + (when (.contains (:tservers test) node) (start-kudu-tserver test node))) diff --git a/java-scala-spark4/kudu-jepsen/src/test/clojure/jepsen/kudu_test.clj b/java-scala-spark4/kudu-jepsen/src/test/clojure/jepsen/kudu_test.clj new file mode 100644 index 0000000000..2d6eec9db9 --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/test/clojure/jepsen/kudu_test.clj @@ -0,0 +1,88 @@ +;; Licensed to the Apache Software Foundation (ASF) under one +;; or more contributor license agreements. See the NOTICE file +;; distributed with this work for additional information +;; regarding copyright ownership. The ASF licenses this file +;; to you under the Apache License, Version 2.0 (the +;; "License"); you may not use this file except in compliance +;; with the License. You may obtain a copy of the License at +;; +;; http://www.apache.org/licenses/LICENSE-2.0 +;; +;; Unless required by applicable law or agreed to in writing, +;; software distributed under the License is distributed on an +;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +;; KIND, either express or implied. See the License for the +;; specific language governing permissions and limitations +;; under the License. + +(ns jepsen.kudu-test + (:require [clojure.test :refer :all] + [jepsen.core :as jepsen] + [jepsen.nemesis :as jn] + [jepsen.tests :as tests] + [jepsen.kudu :as kudu] + [jepsen.kudu.nemesis :as kn] + [jepsen.kudu.register :as kudu-register] + [jepsen.kudu.sets :as kudu-sets])) + +(defn check + [tcasefun opts] + (is (:valid? (:results (jepsen/run! (tcasefun opts)))))) + +(defmacro dt + [tfun tsuffix topts] + (let [tname# (symbol (str (name tfun) "-" tsuffix))] + `(clojure.test/deftest ~tname# (check ~tfun ~topts)))) + +(defn dt-func + [tfun tscenario topts] + `(dt ~tfun ~tscenario ~topts)) + +(defmacro instantiate-tests + [tfun config topts] + (let [seqtfun# (reduce (fn [out _] (conj out tfun)) [] (eval config)) + seqtscenario# (reduce (fn [out e] + (conj out (:scenario e))) [] (eval config)) + seqtopts# (reduce (fn [out e] + (conj out (merge (eval topts) + {:nemesis (:nemesis e)}))) + [] (eval config))] + `(do ~@(map dt-func seqtfun# seqtscenario# seqtopts#)))) + +;; Configurations for tests. Every configuration corresponds to running +;; a test with particular nemesis (let's call it "scenario"). +(def register-test kudu-register/register-test) +(def sets-test kudu-sets/sets-test) +(def test-configs + [ + {:scenario "noop-nemesis" + :nemesis '((fn [] jn/noop))} + {:scenario "tserver-random-halves" + :nemesis '(kn/tserver-partition-random-halves)} + {:scenario "tserver-majorities-ring" + :nemesis '(kn/tserver-partition-majorities-ring)} + {:scenario "kill-restart-2-tservers" + :nemesis '(kn/kill-restart-tserver (comp (partial take 2) shuffle))} + {:scenario "kill-restart-3-tservers" + :nemesis '(kn/kill-restart-tserver (comp (partial take 3) shuffle))} + {:scenario "kill-restart-all-tservers" + :nemesis '(kn/kill-restart-tserver shuffle)} + {:scenario "all-random-halves" + :nemesis '(jn/partition-random-halves)} + {:scenario "all-majorities-ring" + :nemesis '(jn/partition-majorities-ring)} + {:scenario "hammer-2-tservers" + :nemesis '(kn/tserver-hammer-time (comp (partial take 2) shuffle))} + {:scenario "hammer-3-tservers" + :nemesis '(kn/tserver-hammer-time (comp (partial take 3) shuffle))} + {:scenario "hammer-all-tservers" + :nemesis '(kn/tserver-hammer-time shuffle)} + ]) + +(defmacro instantiate-all-kudu-tests + [opts] + `(do + (instantiate-tests register-test test-configs ~opts) + (instantiate-tests sets-test test-configs ~opts))) + +(instantiate-all-kudu-tests {}) diff --git a/java-scala-spark4/kudu-jepsen/src/test/resources/kudu.flags b/java-scala-spark4/kudu-jepsen/src/test/resources/kudu.flags new file mode 100644 index 0000000000..f92461ab52 --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/test/resources/kudu.flags @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This overrides all flags in a flag file. +# +# The block cache capacity is set to not exceed 1/2 of the effective +# memory pressure threshold on smaller machines: current Kudu Jepsen +# tests do not require a lot of memory of block cache capacity around. +# An alternative approach would be replacing that with the +# --force_block_cache_capacity flag. +--unlock_experimental_flags +--unlock_unsafe_flags +--block_cache_capacity_mb=128 +--logtostderr diff --git a/java-scala-spark4/kudu-jepsen/src/test/resources/ntp.conf.common b/java-scala-spark4/kudu-jepsen/src/test/resources/ntp.conf.common new file mode 100644 index 0000000000..6543eb1122 --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/test/resources/ntp.conf.common @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +tinker panic 0 +enable kernel +enable ntp +enable stats +statistics loopstats peerstats clockstats sysstats +filegen loopstats file loopstats type day enable +filegen peerstats file peerstats type day enable +filegen clockstats file clockstats type day enable +filegen sysstats file sysstats type day enable +logconfig =syncall +clockall +sysall +peerall +logfile /var/log/ntpd.log +statsdir /var/log/ntpstats/ +driftfile /var/lib/ntp/ntp.drift diff --git a/java-scala-spark4/kudu-jepsen/src/test/resources/ntp.conf.server b/java-scala-spark4/kudu-jepsen/src/test/resources/ntp.conf.server new file mode 100644 index 0000000000..6657f100e4 --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/test/resources/ntp.conf.server @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +enable calibrate +server 127.127.1.0 burst iburst minpoll 4 maxpoll 4 +fudge 127.127.1.0 stratum 10 diff --git a/java-scala-spark4/kudu-jepsen/src/utils/kudu_test_runner.clj b/java-scala-spark4/kudu-jepsen/src/utils/kudu_test_runner.clj new file mode 100644 index 0000000000..b43e3e6493 --- /dev/null +++ b/java-scala-spark4/kudu-jepsen/src/utils/kudu_test_runner.clj @@ -0,0 +1,115 @@ +;; Licensed to the Apache Software Foundation (ASF) under one +;; or more contributor license agreements. See the NOTICE file +;; distributed with this work for additional information +;; regarding copyright ownership. The ASF licenses this file +;; to you under the Apache License, Version 2.0 (the +;; "License"); you may not use this file except in compliance +;; with the License. You may obtain a copy of the License at +;; +;; http://www.apache.org/licenses/LICENSE-2.0 +;; +;; Unless required by applicable law or agreed to in writing, +;; software distributed under the License is distributed on an +;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +;; KIND, either express or implied. See the License for the +;; specific language governing permissions and limitations +;; under the License. + +;; +;; This is a starter script for maven-clojure-plugin. The script parses passed +;; command-line arguments and starts Kudu jepsen tests with appropriate +;; parameters. +;; +;; The script is invoked via calling 'mvn clojure:run'. The 'clojure:run' target +;; is used instead of 'clojure:test' because the latter does not allow to +;; pass necessary customization parameters for the tests. +;; +;; The script accepts the following command-line options: +;; --masters= +;; --tservers= +;; --ssh-key-path= +;; --iter-num= (default is 1) +;; The list of nodes/hostnames can be separated either by single space or comma. +;; + +(ns jepsen.kudu-test-runner + "Run Kudu jepsen tests via clojure-maven-plugin on 'mvn clojure:run'" + (:require [clojure.tools.logging :refer :all] + [clojure.string :as string] + [clojure.test :refer [run-tests]] + [clojure.tools.cli :refer [parse-opts]] + [jepsen.nemesis :as jn] + [jepsen.control :as jc] + [jepsen.kudu.nemesis :as kn] + [jepsen.kudu-test :refer [instantiate-all-kudu-tests]])) + +(def parse-hostnames #(string/split % #"[, ]")) + +(defn parse-path + [input] + (def trimmed (string/trim input)) + (if (= 0 (count trimmed)) nil trimmed)) + +(def cli-options + [ + [:long-opt "--masters" + :required "" + :desc "Set of Kudu master nodes" + :missing "Kudu master nodes are missing" + :parse-fn parse-hostnames] + [:long-opt "--tservers" + :required "" + :desc "Set of Kudu tserver nodes" + :missing "Kudu tserver nodes are missing" + :parse-fn parse-hostnames] + [:long-opt "--ssh-key-path" + :required "" + :desc "Path to the SSH private key to login into the Kudu nodes. + If not specified or empty, keys are retrieved from SSH agent." + :missing "Path to the SSH private key is not specified, using SSH agent." + :parse-fn parse-path] + [:long-opt "--iter-num" + :required "" + :default 1 + :desc "Number of iterations to run the test suite in cycle." + :parse-fn #(Integer/parseInt %)] + ]) + +(defn get-cmd-line-opts + [] + (let [{:keys [options arguments errors summary]} + (parse-opts *command-line-args* cli-options)] + options)) + +(do + (def cmd-line-opts (get-cmd-line-opts)) + (def test-opts (dissoc cmd-line-opts :ssh-key-path :iter-num)) + (def private-key-path (:ssh-key-path cmd-line-opts)) + (def iter-num (:iter-num cmd-line-opts)) + ;; Custom reporting for the tests. + (defmulti custom-report :type) + (def old-report clojure.test/report) + (defmethod custom-report :default [m] + (old-report m)) + ;; Print the name of the test upon starting it. + (defmethod custom-report :begin-test-var [m] + (println (-> m :var meta :name))) + (println "Running" iter-num "iteration(s) of the test suite") + (println "Running with ssh key:" private-key-path) + (println "Running with test options:" test-opts) + (jepsen.kudu-test/instantiate-all-kudu-tests test-opts) + (binding [jc/*strict-host-key-checking* :no + jc/*private-key-path* private-key-path + clojure.test/report custom-report] + (loop [iteration 0] + (when (< iteration iter-num) + (let [summary (run-tests 'jepsen.kudu-test-runner)] + (when-not (= 0 (:fail summary)) + (println "FAILURE: tests failed.") + (System/exit 1)) + (when-not (= 0 (:error summary)) + (println "ERROR: encountered errors while running the tests.") + (System/exit 1)) + (println "SUCCESS: all tests passed; no errors.") + (recur (inc iteration))))) + (System/exit 0))) diff --git a/java-scala-spark4/kudu-proto/build.gradle b/java-scala-spark4/kudu-proto/build.gradle new file mode 100644 index 0000000000..c32cad421f --- /dev/null +++ b/java-scala-spark4/kudu-proto/build.gradle @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +apply from: "$rootDir/gradle/protobuf.gradle" + +dependencies { + compile libs.protobufJava +} + +// Add protobuf files to the proto source set. +sourceSets { + main { + proto { + srcDir "${project.rootDir}/../src" + // Excluded any test proto files + exclude "**/*test*.proto" + } + } +} + +// kudu-proto has no public Javadoc. +javadoc { + enabled = false +} + +// Skip publishing kudu-proto artifact because it will always be shaded into kudu-client. +uploadArchives.enabled = false +install.enabled = false diff --git a/java-scala-spark4/kudu-spark-tools/build.gradle b/java-scala-spark4/kudu-spark-tools/build.gradle new file mode 100644 index 0000000000..e754ec2c5d --- /dev/null +++ b/java-scala-spark4/kudu-spark-tools/build.gradle @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +apply plugin: "scala" +apply from: "$rootDir/gradle/shadow.gradle" + +dependencies { + compile project(path: ":kudu-client", configuration: "shadow") + compile project(path: ":kudu-spark", configuration: "shadow") + // TODO(KUDU-2500): Spark uses reflection which requires the annotations at runtime. + compile libs.yetusAnnotations + compile (libs.scopt) { + // Make sure wrong Scala version is not pulled in. + exclude group: "org.scala-lang", module: "scala-library" + } + + provided libs.scalaLibrary + provided libs.sparkAvro + provided libs.sparkCore + provided libs.sparkSql + provided libs.slf4jApi + + testCompile project(path: ":kudu-test-utils", configuration: "shadow") + testCompile project(path: ":kudu-spark", configuration: "test") + testCompile libs.junit + testCompile libs.log4jApi + testCompile libs.log4jCore + testCompile libs.log4jSlf4jImpl + testCompile libs.scalatest +} + +// Adjust the artifact name to match the maven build. +archivesBaseName = "kudu-spark${versions.sparkBase}-tools_${versions.scalaBase}" diff --git a/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/BigLinkedListCommon.java b/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/BigLinkedListCommon.java new file mode 100644 index 0000000000..fd5f9133c0 --- /dev/null +++ b/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/BigLinkedListCommon.java @@ -0,0 +1,193 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.tools; + +import java.math.BigInteger; +import java.util.Collections; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.CreateTableOptions; +import org.apache.kudu.client.PartialRow; + +/** + * Static constants, helper methods, and utility classes for BigLinkedList + * implementations. + * + * Any definitions which must be kept in-sync between ITBLL implementations + * should be kept here. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class BigLinkedListCommon { + + public static final String TABLE_NAME_KEY = "IntegrationTestBigLinkedList.table"; + public static final String DEFAULT_TABLE_NAME = "default.IntegrationTestBigLinkedList"; + public static final String HEADS_TABLE_NAME_KEY = "IntegrationTestBigLinkedList.heads_table"; + public static final String DEFAULT_HEADS_TABLE_NAME = "default.IntegrationTestBigLinkedListHeads"; + + /** Row key, two times 8 bytes. */ + public static final String COLUMN_KEY_ONE = "key1"; + public static final int COLUMN_KEY_ONE_IDX = 0; + public static final String COLUMN_KEY_TWO = "key2"; + public static final int COLUMN_KEY_TWO_IDX = 1; + + /** Link to the id of the prev node in the linked list, two times 8 bytes. */ + public static final String COLUMN_PREV_ONE = "prev1"; + public static final int COLUMN_PREV_ONE_IDX = 2; + public static final String COLUMN_PREV_TWO = "prev2"; + public static final int COLUMN_PREV_TWO_IDX = 3; + + /** the id of the row within the same client. */ + public static final String COLUMN_ROW_ID = "row_id"; + public static final int COLUMN_ROW_ID_IDX = 4; + + /** identifier of the mapred task that generated this row. */ + public static final String COLUMN_CLIENT = "client"; + public static final int COLUMN_CLIENT_IDX = 5; + + /** The number of times this row was updated. */ + public static final String COLUMN_UPDATE_COUNT = "update_count"; + public static final int COLUMN_UPDATE_COUNT_IDX = 6; + + public enum Counts { + + /** Nodes which are not contained in the previous pointer of any other nodes. */ + UNREFERENCED, + + /** Nodes which are referenced from another node, but do not appear in the table. */ + UNDEFINED, + + /** Nodes which have a single reference from another node. */ + REFERENCED, + + /** Nodes which have multiple references from other nodes. */ + EXTRAREFERENCES, + } + + public static Schema getTableSchema() { + return new Schema(ImmutableList.of( + new ColumnSchema.ColumnSchemaBuilder(COLUMN_KEY_ONE, Type.INT64).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder(COLUMN_KEY_TWO, Type.INT64).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder(COLUMN_PREV_ONE, Type.INT64).nullable(true).build(), + new ColumnSchema.ColumnSchemaBuilder(COLUMN_PREV_TWO, Type.INT64).nullable(true).build(), + new ColumnSchema.ColumnSchemaBuilder(COLUMN_ROW_ID, Type.INT64).build(), + new ColumnSchema.ColumnSchemaBuilder(COLUMN_CLIENT, Type.STRING).build(), + new ColumnSchema.ColumnSchemaBuilder(COLUMN_UPDATE_COUNT, Type.INT32).build() + )); + } + + public static Schema getHeadsTableSchema() { + return new Schema(ImmutableList.of( + new ColumnSchema.ColumnSchemaBuilder(COLUMN_KEY_ONE, Type.INT64).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder(COLUMN_KEY_TWO, Type.INT64).key(true).build() + )); + } + + public static CreateTableOptions getCreateTableOptions(Schema schema, + int numReplicas, + int rangePartitions, + int hashPartitions) { + Preconditions.checkArgument(rangePartitions > 0); + Preconditions.checkArgument(hashPartitions > 0); + + CreateTableOptions options = new CreateTableOptions().setNumReplicas(numReplicas); + + if (rangePartitions > 1) { + options.setRangePartitionColumns(ImmutableList.of(COLUMN_KEY_ONE)); + BigInteger min = BigInteger.valueOf(Long.MIN_VALUE); + BigInteger max = BigInteger.valueOf(Long.MAX_VALUE); + BigInteger step = max.multiply(BigInteger.valueOf(2)) + .divide(BigInteger.valueOf(rangePartitions)); + + PartialRow splitRow = schema.newPartialRow(); + for (int i = 1; i < rangePartitions; i++) { + long key = min.add(step.multiply(BigInteger.valueOf(i))).longValue(); + splitRow.addLong(COLUMN_KEY_ONE_IDX, key); + options.addSplitRow(splitRow); + } + } else { + options.setRangePartitionColumns(Collections.emptyList()); + } + + if (hashPartitions > 1) { + options.addHashPartitions(ImmutableList.of(COLUMN_KEY_ONE), hashPartitions); + } + + return options; + } + + /** + * Implementation of the Xoroshiro128+ PRNG. + * Copied under the public domain from SquidLib. + */ + public static class Xoroshiro128PlusRandom { + private long state0; + private long state1; + + public Xoroshiro128PlusRandom() { + this((long) (Math.random() * Long.MAX_VALUE)); + } + + public Xoroshiro128PlusRandom(long seed) { + long state = seed + 0x9E3779B97F4A7C15L; + long z = state; + z = (z ^ (z >>> 30)) * 0xBF58476D1CE4E5B9L; + z = (z ^ (z >>> 27)) * 0x94D049BB133111EBL; + state0 = z ^ (z >>> 31); + state += state0 + 0x9E3779B97F4A7C15L; + z = state; + z = (z ^ (z >>> 30)) * 0xBF58476D1CE4E5B9L; + z = (z ^ (z >>> 27)) * 0x94D049BB133111EBL; + state1 = z ^ (z >>> 31); + } + + public long nextLong() { + final long s0 = state0; + long s1 = state1; + final long result = s0 + s1; + + s1 ^= s0; + state0 = Long.rotateLeft(s0, 55) ^ s1 ^ (s1 << 14); // a, b + state1 = Long.rotateLeft(s1, 36); // c + + return result; + } + + public void nextBytes(final byte[] bytes) { + int i = bytes.length; + int n = 0; + while (i != 0) { + n = Math.min(i, 8); + for (long bits = nextLong(); n-- != 0; bits >>>= 8) { + bytes[--i] = (byte) bits; + } + } + } + } + + /** Uninstantiable helper class. */ + private BigLinkedListCommon() { + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/DistributedDataGenerator.scala b/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/DistributedDataGenerator.scala new file mode 100644 index 0000000000..294a6a7160 --- /dev/null +++ b/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/DistributedDataGenerator.scala @@ -0,0 +1,318 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.tools + +import java.math.BigDecimal +import java.math.BigInteger +import java.nio.charset.StandardCharsets + +import org.apache.kudu.Schema +import org.apache.kudu.Type +import org.apache.kudu.client.PartialRow +import org.apache.kudu.client.SessionConfiguration +import org.apache.kudu.spark.kudu.KuduContext +import org.apache.kudu.spark.kudu.KuduWriteOptions +import org.apache.kudu.spark.kudu.RowConverter +import org.apache.kudu.spark.kudu.SparkUtil +import org.apache.kudu.spark.tools.DistributedDataGeneratorOptions._ +import org.apache.kudu.util.DataGenerator +import org.apache.kudu.util.DateUtil +import org.apache.spark.sql.Row +import org.apache.spark.sql.SparkSession +import org.apache.spark.util.LongAccumulator +import org.apache.spark.SparkConf +import org.apache.spark.SparkContext +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability +import org.slf4j.Logger +import org.slf4j.LoggerFactory +import scopt.OptionParser + +import scala.collection.JavaConverters._ + +case class GeneratorMetrics(rowsWritten: LongAccumulator, collisions: LongAccumulator) + +object GeneratorMetrics { + + def apply(sc: SparkContext): GeneratorMetrics = { + GeneratorMetrics(sc.longAccumulator("rows_written"), sc.longAccumulator("row_collisions")) + } +} + +object DistributedDataGenerator { + val log: Logger = LoggerFactory.getLogger(getClass) + + def run(options: DistributedDataGeneratorOptions, ss: SparkSession): GeneratorMetrics = { + log.info(s"Running a DistributedDataGenerator with options: $options") + val sc = ss.sparkContext + val context = new KuduContext(options.masterAddresses, sc) + val metrics = GeneratorMetrics(sc) + + // Generate the Inserts. + var rdd = sc + .parallelize(0 until options.numTasks, numSlices = options.numTasks) + .mapPartitions( + { taskNumIter => + // We know there is only 1 task per partition because numSlices = options.numTasks above. + val taskNum = taskNumIter.next() + val generator = new DataGenerator.DataGeneratorBuilder() + // Add taskNum to the seed otherwise each task will try to generate the same rows. + .random(new java.util.Random(options.seed + taskNum)) + .stringLength(options.stringLength) + .binaryLength(options.binaryLength) + .build() + val table = context.syncClient.openTable(options.tableName) + val schema = table.getSchema + val numRows = options.numRows / options.numTasks + val startRow: Long = numRows * taskNum + new GeneratedRowIterator(generator, options.generatorType, schema, startRow, numRows) + }, + true + ) + + if (options.repartition) { + val table = context.syncClient.openTable(options.tableName) + val sparkSchema = SparkUtil.sparkSchema(table.getSchema) + rdd = context + .repartitionRows(rdd, options.tableName, sparkSchema, KuduWriteOptions(ignoreNull = true)) + } + + // Write the rows to Kudu. + // TODO: Use context.writeRows while still tracking inserts/collisions. + rdd.foreachPartition { rows => + val kuduClient = context.syncClient + val table = kuduClient.openTable(options.tableName) + val kuduSchema = table.getSchema + val sparkSchema = SparkUtil.sparkSchema(kuduSchema) + val converter = new RowConverter(kuduSchema, sparkSchema, ignoreNull = true) + + val session = kuduClient.newSession() + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND) + + var rowsWritten = 0 + rows.foreach { row => + val insert = table.newInsert() + val partialRow = converter.toPartialRow(row) + insert.setRow(partialRow) + session.apply(insert) + rowsWritten += 1 + } + // Synchronously flush after the last record is written. + session.flush() + + // Track the collisions. + var collisions = 0 + for (error <- session.getPendingErrors.getRowErrors) { + if (error.getErrorStatus.isAlreadyPresent) { + // Because we can't check for collisions every time, but instead + // only when the rows are flushed, we subtract any rows that may + // have failed from the counter. + rowsWritten -= 1 + collisions += 1 + } else { + throw new RuntimeException("Kudu write error: " + error.getErrorStatus.toString) + } + } + metrics.rowsWritten.add(rowsWritten) + metrics.collisions.add(collisions) + session.close() + } + + metrics + } + + /** + * Entry point for testing. SparkContext is a singleton, + * so tests must create and manage their own. + */ + @InterfaceAudience.LimitedPrivate(Array("Test")) + def testMain(args: Array[String], ss: SparkSession): GeneratorMetrics = { + DistributedDataGeneratorOptions.parse(args) match { + case None => throw new IllegalArgumentException("Could not parse arguments") + case Some(config) => run(config, ss) + } + } + + def main(args: Array[String]): Unit = { + val conf = new SparkConf().setAppName("DistributedDataGenerator") + val ss = SparkSession.builder().config(conf).getOrCreate() + val metrics = testMain(args, ss) + log.info(s"Rows written: ${metrics.rowsWritten.value}") + log.info(s"Collisions: ${metrics.collisions.value}") + } +} + +private class GeneratedRowIterator( + generator: DataGenerator, + generatorType: String, + schema: Schema, + startRow: Long, + numRows: Long) + extends Iterator[Row] { + + val sparkSchema = SparkUtil.sparkSchema(schema) + // ignoreNull values so unset/defaulted rows can be passed through. + val converter = new RowConverter(schema, sparkSchema, ignoreNull = true) + + var currentRow: Long = startRow + var rowsGenerated: Long = 0 + + override def hasNext: Boolean = rowsGenerated < numRows + + override def next(): Row = { + if (rowsGenerated >= numRows) { + throw new IllegalStateException("Already generated all of the rows.") + } + + val partialRow = schema.newPartialRow() + if (generatorType == SequentialGenerator) { + setRow(partialRow, currentRow) + } else if (generatorType == RandomGenerator) { + generator.randomizeRow(partialRow) + } else { + throw new IllegalArgumentException(s"Generator type of $generatorType is unsupported") + } + currentRow += 1 + rowsGenerated += 1 + converter.toRow(partialRow) + } + + /** + * Sets all the columns in the passed row to the passed value. + * TODO(ghenke): Consider failing when value doesn't fit into the type. + */ + private def setRow(row: PartialRow, value: Long): Unit = { + val schema = row.getSchema + val columns = schema.getColumns.asScala + columns.indices.foreach { i => + val col = columns(i) + col.getType match { + case Type.BOOL => + row.addBoolean(i, value % 2 == 1) + case Type.INT8 => + row.addByte(i, value.toByte) + case Type.INT16 => + row.addShort(i, value.toShort) + case Type.INT32 => + row.addInt(i, value.toInt) + case Type.INT64 => + row.addLong(i, value) + case Type.UNIXTIME_MICROS => + row.addLong(i, value) + case Type.DATE => + row.addDate(i, DateUtil.epochDaysToSqlDate(value.toInt)) + case Type.FLOAT => + row.addFloat(i, value.toFloat) + case Type.DOUBLE => + row.addDouble(i, value.toDouble) + case Type.DECIMAL => + row.addDecimal( + i, + new BigDecimal(BigInteger.valueOf(value), col.getTypeAttributes.getScale)) + case Type.VARCHAR => + row.addVarchar(i, String.valueOf(value)) + case Type.STRING => + row.addString(i, String.valueOf(value)) + case Type.BINARY => + val bytes: Array[Byte] = String.valueOf(value).getBytes(StandardCharsets.UTF_8) + row.addBinary(i, bytes) + case _ => + throw new UnsupportedOperationException("Unsupported type " + col.getType) + } + } + } +} + +@InterfaceAudience.Private +@InterfaceStability.Unstable +case class DistributedDataGeneratorOptions( + tableName: String, + masterAddresses: String, + generatorType: String = DistributedDataGeneratorOptions.DefaultGeneratorType, + numRows: Long = DistributedDataGeneratorOptions.DefaultNumRows, + numTasks: Int = DistributedDataGeneratorOptions.DefaultNumTasks, + stringLength: Int = DistributedDataGeneratorOptions.DefaultStringLength, + binaryLength: Int = DistributedDataGeneratorOptions.DefaultStringLength, + seed: Long = System.currentTimeMillis(), + repartition: Boolean = DistributedDataGeneratorOptions.DefaultRepartition) + +@InterfaceAudience.Private +@InterfaceStability.Unstable +object DistributedDataGeneratorOptions { + val DefaultNumRows: Long = 10000 + val DefaultNumTasks: Int = 1 + val DefaultStringLength: Int = 128 + val DefaultBinaryLength: Int = 128 + val RandomGenerator: String = "random" + val SequentialGenerator: String = "sequential" + val DefaultGeneratorType: String = SequentialGenerator + val DefaultRepartition: Boolean = false + + private val parser: OptionParser[DistributedDataGeneratorOptions] = + new OptionParser[DistributedDataGeneratorOptions]("LoadRandomData") { + + arg[String]("table-name") + .action((v, o) => o.copy(tableName = v)) + .text("The table to load with random data") + + arg[String]("master-addresses") + .action((v, o) => o.copy(masterAddresses = v)) + .text("Comma-separated addresses of Kudu masters") + + opt[String]("type") + .action((v, o) => o.copy(generatorType = v)) + .text(s"The type of data generator. Must be one of 'random' or 'sequential'. " + + s"Default: ${DefaultGeneratorType}") + .optional() + + opt[Long]("num-rows") + .action((v, o) => o.copy(numRows = v)) + .text(s"The total number of unique rows to generate. Default: ${DefaultNumRows}") + .optional() + + opt[Int]("num-tasks") + .action((v, o) => o.copy(numTasks = v)) + .text(s"The total number of Spark tasks to use when generating data. " + + s"Default: ${DefaultNumTasks}") + .optional() + + opt[Int]("string-length") + .action((v, o) => o.copy(stringLength = v)) + .text(s"The length of generated string fields. Default: ${DefaultStringLength}") + .optional() + + opt[Int]("binary-length") + .action((v, o) => o.copy(binaryLength = v)) + .text(s"The length of generated binary fields. Default: ${DefaultBinaryLength}") + .optional() + + opt[Long]("seed") + .action((v, o) => o.copy(seed = v)) + .text(s"The seed to use in the random data generator. " + + s"Default: `System.currentTimeMillis()`") + + opt[Boolean]("repartition") + .action((v, o) => o.copy(repartition = v)) + .text(s"Repartition the data to ensure each spark task talks to a minimal " + + s"set of tablet servers.") + } + + def parse(args: Seq[String]): Option[DistributedDataGeneratorOptions] = { + parser.parse(args, DistributedDataGeneratorOptions("", "")) + } +} diff --git a/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/ImportExportFiles.scala b/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/ImportExportFiles.scala new file mode 100644 index 0000000000..0700c255e1 --- /dev/null +++ b/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/ImportExportFiles.scala @@ -0,0 +1,170 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.tools + +import java.net.InetAddress + +import org.apache.kudu.client.KuduClient +import org.apache.kudu.spark.tools.ImportExportKudu.ArgsCls +import org.apache.spark.sql.SparkSession +import org.apache.spark.SparkConf +import org.slf4j.Logger +import org.slf4j.LoggerFactory +import org.apache.kudu.spark.kudu._ +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability + +@InterfaceAudience.Public +@InterfaceStability.Unstable //TODO: Unstable due to KUDU-2454 +object ImportExportKudu { + val log: Logger = LoggerFactory.getLogger(ImportExportKudu.getClass) + + def fail(msg: String): Nothing = { + System.err.println(msg) + sys.exit(1) + } + + def defaultMasterAddrs: String = InetAddress.getLocalHost.getCanonicalHostName + + def usage: String = + s""" + | Usage: --operation=import/export --format= --master-addrs= --path= --table-name= + | where + | operation: import or export data from or to Kudu tables, default: import + | format: specify the format of data want to import/export, the following formats are supported csv,parquet,avro default:csv + | masterAddrs: comma separated addresses of Kudu master nodes, default: $defaultMasterAddrs + | path: path to input or output for import/export operation, default: file:// + | tableName: table name to import/export, default: "" + | columns: columns name for select statement on export from kudu table, default: * + | delimiter: delimiter for csv import/export, default: , + | header: header for csv import/export, default:false + """.stripMargin + + case class ArgsCls( + operation: String = "import", + format: String = "csv", + masterAddrs: String = defaultMasterAddrs, + path: String = "file://", + tableName: String = "", + columns: String = "*", + delimiter: String = ",", + header: String = "false", + inferschema: String = "false") + + object ArgsCls { + private def parseInner(options: ArgsCls, args: List[String]): ArgsCls = { + log.info(args.mkString(",")) + args match { + case Nil => options + case "--help" :: _ => + System.err.println(usage) + sys.exit(0) + case flag :: Nil => fail(s"flag $flag has no value\n$usage") + case flag :: value :: tail => + val newOptions: ArgsCls = flag match { + case "--operation" => options.copy(operation = value) + case "--format" => options.copy(format = value) + case "--master-addrs" => options.copy(masterAddrs = value) + case "--path" => options.copy(path = value) + case "--table-name" => options.copy(tableName = value) + case "--columns" => options.copy(columns = value) + case "--delimiter" => options.copy(delimiter = value) + case "--header" => options.copy(header = value) + case "--inferschema" => options.copy(inferschema = value) + case _ => fail(s"unknown argument given $flag") + } + parseInner(newOptions, tail) + } + } + + def parse(args: Array[String]): ArgsCls = { + parseInner(ArgsCls(), args.flatMap(_.split('=')).toList) + } + } +} + +object ImportExportFiles { + + import ImportExportKudu.fail + + def run(args: ArgsCls, ss: SparkSession): Unit = { + val kc = new KuduContext(args.masterAddrs, ss.sparkContext) + val sqlContext = ss.sqlContext + + val client: KuduClient = kc.syncClient + if (!client.tableExists(args.tableName)) { + fail(args.tableName + s" table doesn't exist") + } + + val kuduOptions = + Map("kudu.table" -> args.tableName, "kudu.master" -> args.masterAddrs) + + args.operation match { + case "import" => + args.format match { + case "csv" => + val df = sqlContext.read + .option("header", args.header) + .option("delimiter", args.delimiter) + .csv(args.path) + kc.upsertRows(df, args.tableName) + case "parquet" => + val df = sqlContext.read.parquet(args.path) + kc.upsertRows(df, args.tableName) + case "avro" => + val df = sqlContext.read + .format("com.databricks.spark.avro") + .load(args.path) + kc.upsertRows(df, args.tableName) + case _ => fail(args.format + s"unknown argument given ") + } + case "export" => + val df = sqlContext.read.options(kuduOptions).format("kudu").load.select(args.columns) + args.format match { + case "csv" => + df.write + .format("com.databricks.spark.csv") + .option("header", args.header) + .option("delimiter", args.delimiter) + .save(args.path) + case "parquet" => + df.write.parquet(args.path) + case "avro" => + df.write.format("com.databricks.spark.avro").save(args.path) + case _ => fail(args.format + s"unknown argument given ") + } + case _ => fail(args.operation + s"unknown argument given ") + } + } + + /** + * Entry point for testing. SparkContext is a singleton, + * so tests must create and manage their own. + */ + @InterfaceAudience.LimitedPrivate(Array("Test")) + def testMain(args: Array[String], ss: SparkSession): Unit = { + run(ArgsCls.parse(args), ss) + } + + def main(args: Array[String]): Unit = { + val conf = + new SparkConf().setAppName("Import or Export CSV files from/to Kudu ") + val ss = SparkSession.builder().config(conf).getOrCreate() + testMain(args, ss) + } +} diff --git a/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/IntegrationTestBigLinkedList.scala b/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/IntegrationTestBigLinkedList.scala new file mode 100644 index 0000000000..d1f3bed691 --- /dev/null +++ b/java-scala-spark4/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/IntegrationTestBigLinkedList.scala @@ -0,0 +1,481 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.tools + +import java.net.InetAddress +import java.util.Locale + +import org.apache.kudu.client.SessionConfiguration.FlushMode +import org.apache.kudu.client.KuduClient +import org.apache.kudu.client.KuduSession +import org.apache.kudu.client.KuduTable +import org.apache.kudu.spark.kudu.KuduContext +import org.apache.kudu.spark.tools.BigLinkedListCommon.Xoroshiro128PlusRandom +import org.apache.kudu.spark.tools.BigLinkedListCommon._ +import org.apache.spark.sql.SparkSession +import org.apache.spark.SparkConf +import org.apache.spark.TaskContext +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +import scala.util.Try + +/** + * Spark port of legacy IntegrationTestBigLinkedList: + * https://github.com/apache/kudu/blob/branch-1.14.x/java/kudu-spark-tools/src/main/scala/org/apache/kudu/spark/tools/IntegrationTestBigLinkedList.scala + * + * Major differences: + * * Currently, only the generator and verifier jobs are implemented. + * * The heads table is not written to during generate, and not used during verify. + * * The generate job does not write in batches. Instead, it writes a head node, + * followed by many tail nodes into the table, and then updates just the + * head node to point at the final tail node. Writes use AUTO_FLUSH_BACKGROUND. + * This is hopefully easier to understand, and has the advantage of stressing + * slightly different code paths than the MR version. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +object IntegrationTestBigLinkedList { + + val log: Logger = + LoggerFactory.getLogger(IntegrationTestBigLinkedList.getClass) + + def usage: String = + s""" + | Usage: COMMAND [COMMAND options]"); + | where COMMAND is one of: + | + | generate A Spark job that generates linked list data. + | + | verify A Spark job that verifies generated linked list data. + | Fails the job if any UNDEFINED, UNREFERENCED, or + | EXTRAREFERENCES nodes are found. Do not run at the + | same time as the Generate command. + | + | loop Loops the generate and verify jobs indefinitely. + | Data is not cleaned between runs, so each iteration + | adds more data. + """.stripMargin + + def parseIntFlag(flag: String, num: String): Int = { + Try(num.toInt) + .getOrElse(fail(s"failed to parse $flag value as integer: $num")) + } + + def parseLongFlag(flag: String, num: String): Long = { + Try(num.toLong) + .getOrElse(fail(s"failed to parse $flag value as integer: $num")) + } + + def fail(msg: String): Nothing = { + System.err.println(s"FAILURE: $msg") + sys.exit(1) + } + + def nanosToHuman(n: Long): String = { + if (n > 10 * 60 * 1e9) "%s.3m".format(n / (60 * 1e9)) + else if (n > 1e9) "%s.3s".format(n / 1e9) + else if (n > 1e6) "%s.3ms".format(n / 1e6) + else if (n > 1e3) "%s.3μs".format(n / 1e3) + else s"${n}ns" + } + + def defaultMasterAddrs: String = InetAddress.getLocalHost.getCanonicalHostName + + def main(args: Array[String]): Unit = { + if (args.isEmpty) { fail(usage) } + + args(0).toLowerCase(Locale.ENGLISH) match { + case "generate" => Generator.main(args.slice(1, args.length)) + case "verify" => Verifier.main(args.slice(1, args.length)) + case "loop" => Looper.main(args.slice(1, args.length)) + case _ => fail(usage) + } + } +} + +object Generator { + import IntegrationTestBigLinkedList.log + import IntegrationTestBigLinkedList.defaultMasterAddrs + import IntegrationTestBigLinkedList.fail + import IntegrationTestBigLinkedList.nanosToHuman + import IntegrationTestBigLinkedList.parseIntFlag + + def usage: String = + s""" + | Usage: generate --tasks= --lists= --nodes= + | --hash-partitions= --range-partitions= + | --replicas= --master-addrs= --table-name= + | where + | tasks: number of Spark tasks to create, default: 1 + | lists: number of linked lists to create per task, default: 1 + | nodes: number of nodes to create per list, default: 10000000 + | hashPartitions: number of hash partitions to create for the new linked list table, if it doesn't exist, default: 1 + | rangePartitions: number of range partitions to create for the new linked list table, if it doesn't exist, default: 1 + | replicas: number of replicas to create for the new linked list table, if it doesn't exist, default: 1 + | master-addrs: comma separated addresses of Kudu master nodes, default: $defaultMasterAddrs + | table-name: the name of the linked list table, default: $DEFAULT_TABLE_NAME + """.stripMargin + + case class Args( + tasks: Int = 1, + lists: Int = 1, + nodes: Int = 10000000, + hashPartitions: Int = 1, + rangePartitions: Int = 1, + replicas: Int = 1, + masterAddrs: String = defaultMasterAddrs, + tableName: String = DEFAULT_TABLE_NAME) + + object Args { + private def parseInner(options: Args, args: List[String]): Args = { + args match { + case Nil => options + case "--help" :: _ => + System.err.println(usage) + sys.exit(0) + case flag :: Nil => fail(s"flag $flag has no value\n$usage") + case flag :: value :: tail => + val newOptions: Args = flag match { + case "--tasks" => options.copy(tasks = parseIntFlag(flag, value)) + case "--lists" => options.copy(lists = parseIntFlag(flag, value)) + case "--nodes" => options.copy(nodes = parseIntFlag(flag, value)) + case "--hash-partitions" => + options.copy(hashPartitions = parseIntFlag(flag, value)) + case "--range-partitions" => + options.copy(rangePartitions = parseIntFlag(flag, value)) + case "--replicas" => + options.copy(replicas = parseIntFlag(flag, value)) + case "--master-addrs" => options.copy(masterAddrs = value) + case "--table-name" => options.copy(tableName = value) + case _ => fail(s"unknown generate flag $flag") + } + parseInner(newOptions, tail) + } + } + + def parse(args: Array[String]): Args = { + parseInner(Args(), args.flatMap(_.split('=')).toList) + } + } + + def run(args: Args, ss: SparkSession): Unit = { + val kc = new KuduContext(args.masterAddrs, ss.sparkContext) + val applicationId = ss.sparkContext.applicationId + + val client: KuduClient = kc.syncClient + if (!client.tableExists(args.tableName)) { + val schema = getTableSchema + val options = + getCreateTableOptions(schema, args.replicas, args.rangePartitions, args.hashPartitions) + client.createTable(args.tableName, getTableSchema, options) + } + + // Run the generate tasks + ss.sparkContext + .makeRDD(0 until args.tasks, args.tasks) + .foreach(_ => generate(args, applicationId, kc)) + } + + def main(args: Array[String]): Unit = { + val conf = + new SparkConf().setAppName("Integration Test Big Linked List Generator") + val ss = SparkSession.builder().config(conf).getOrCreate() + run(Args.parse(args), ss) + } + + /** + * Entry point for testing. SparkContext is a singleton, + * so tests must create and manage their own. + */ + @InterfaceAudience.LimitedPrivate(Array("Test")) + def testMain(args: Array[String], ss: SparkSession): Unit = { + run(Args.parse(args), ss) + } + + def generate(args: Args, applicationId: String, kc: KuduContext): Unit = { + val taskContext = TaskContext.get() + val clientId = s"$applicationId-${taskContext.partitionId()}" + + val rand = new Xoroshiro128PlusRandom() + + val client: KuduClient = kc.syncClient + + val table: KuduTable = client.openTable(args.tableName) + val session: KuduSession = client.newSession() + session.setFlushMode(FlushMode.AUTO_FLUSH_BACKGROUND) + try { + for (_ <- 0 until args.lists) { + val start = System.nanoTime() + insertList(clientId, args, table, session, rand) + log.info( + s"$clientId inserted ${args.nodes} node linked list in {}", + nanosToHuman(System.nanoTime() - start)) + } + } finally { + session.close() + } + } + + def insertList( + clientId: String, + args: Args, + table: KuduTable, + session: KuduSession, + rand: Xoroshiro128PlusRandom): Unit = { + + // Write the head node to the table. + val headKeyOne = rand.nextLong() + val headKeyTwo = rand.nextLong() + + { + val insert = table.newInsert() + insert.getRow.addLong(COLUMN_KEY_ONE_IDX, headKeyOne) + insert.getRow.addLong(COLUMN_KEY_TWO_IDX, headKeyTwo) + insert.getRow.addLong(COLUMN_ROW_ID_IDX, 0) + insert.getRow.addString(COLUMN_CLIENT_IDX, clientId) + insert.getRow.addInt(COLUMN_UPDATE_COUNT_IDX, 0) + session.apply(insert) + } + + // Write the rest of the list nodes. + var prevKeyOne = headKeyOne + var prevKeyTwo = headKeyTwo + for (rowIdx <- 1 until args.nodes) { + val keyOne = rand.nextLong() + val keyTwo = rand.nextLong() + val insert = table.newInsert() + insert.getRow.addLong(COLUMN_KEY_ONE_IDX, keyOne) + insert.getRow.addLong(COLUMN_KEY_TWO_IDX, keyTwo) + insert.getRow.addLong(COLUMN_PREV_ONE_IDX, prevKeyOne) + insert.getRow.addLong(COLUMN_PREV_TWO_IDX, prevKeyTwo) + insert.getRow.addLong(COLUMN_ROW_ID_IDX, rowIdx) + insert.getRow.addString(COLUMN_CLIENT_IDX, clientId) + insert.getRow.addInt(COLUMN_UPDATE_COUNT_IDX, 0) + session.apply(insert) + prevKeyOne = keyOne + prevKeyTwo = keyTwo + } + + // Update the head node's previous pointers to point to the last node. + { + val update = table.newUpdate() + update.getRow.addLong(COLUMN_KEY_ONE_IDX, headKeyOne) + update.getRow.addLong(COLUMN_KEY_TWO_IDX, headKeyTwo) + update.getRow.addLong(COLUMN_PREV_ONE_IDX, prevKeyOne) + update.getRow.addLong(COLUMN_PREV_TWO_IDX, prevKeyTwo) + session.apply(update) + } + + session.flush() + val errors = session.getPendingErrors + if (errors.getRowErrors.length > 0) { + throw new RuntimeException( + errors.getRowErrors + .map(_.getErrorStatus.toString) + .mkString("Row errors: [", ", ", "]")) + } + } +} + +object Verifier { + import IntegrationTestBigLinkedList.defaultMasterAddrs + import IntegrationTestBigLinkedList.fail + import IntegrationTestBigLinkedList.parseLongFlag + + def usage: String = + s""" + | Usage: verify --nodes= --master-addrs= --table-name= + | where + | nodes: number of nodes expected to be in the linked list table + | master-addrs: comma separated addresses of Kudu master nodes, default: $defaultMasterAddrs + | table-name: the name of the linked list table, default: $DEFAULT_TABLE_NAME + """.stripMargin + + case class Args( + nodes: Option[Long] = None, + masterAddrs: String = defaultMasterAddrs, + tableName: String = DEFAULT_TABLE_NAME) + + object Args { + private def parseInner(options: Args, args: List[String]): Args = { + args match { + case Nil => options + case "--help" :: _ => + System.err.println(usage) + sys.exit(0) + case flag :: Nil => fail(s"flag $flag has no value\n$usage") + case flag :: value :: tail => + val newOptions = flag match { + case "--nodes" => + options.copy(nodes = Some(parseLongFlag(flag, value))) + case "--master-addrs" => options.copy(masterAddrs = value) + case "--table-name" => options.copy(tableName = value) + case _ => fail(s"unknown verify flag $flag") + } + parseInner(newOptions, tail) + } + } + + def parse(args: Array[String]): Args = { + parseInner(Args(), args.flatMap(_.split('=')).toList) + } + } + + case class Counts(referenced: Long, unreferenced: Long, extrareferences: Long, undefined: Long) + + /** + * Verifies the expected count against the count of nodes from a verification run. + * @param expected the expected node count + * @param counts the node counts returned by the verification job + * @return an error message, if the verification fails + */ + def verify(expected: Option[Long], counts: Counts): Option[String] = { + if (expected.exists(_ != counts.referenced)) { + Some( + s"Found ${counts.referenced} referenced nodes, " + + s"which does not match the expected count of ${expected.get} nodes") + } else if (counts.unreferenced > 0) { + Some(s"Found ${counts.unreferenced} unreferenced nodes") + } else if (counts.undefined > 0) { + Some(s"Found ${counts.undefined} undefined nodes") + } else if (counts.extrareferences > 0) { + Some(s"Found ${counts.extrareferences} extra-referenced nodes") + } else None + } + + @InterfaceAudience.LimitedPrivate(Array("Test")) + def run(args: Args, ss: SparkSession): Counts = { + val sql = ss.sqlContext + + sql.read + .option("kudu.master", args.masterAddrs) + .option("kudu.table", args.tableName) + .format("kudu") + .load + .createOrReplaceTempView("nodes") + + // Get a table of all nodes and their ref count + sql.sql(s""" + | SELECT (SELECT COUNT(*) + | FROM nodes t2 + | WHERE t1.$COLUMN_KEY_ONE = t2.$COLUMN_PREV_ONE + | AND t1.$COLUMN_KEY_TWO = t2.$COLUMN_PREV_TWO) AS ref_count + | FROM nodes t1 + """.stripMargin).createOrReplaceTempView("ref_counts") + + // Compress the ref counts down to 0, 1, or 2. 0 Indicates no references, + // 1 indicates a single reference, and 2 indicates more than 1 reference. + sql.sql(s""" + | SELECT (CASE WHEN ref_count > 1 THEN 2 ELSE ref_count END) as ref_count + | FROM ref_counts + """.stripMargin).createOrReplaceTempView("ref_counts") + + // Aggregate the ref counts + sql.sql(s""" + | SELECT ref_count, COUNT(*) as nodes + | FROM ref_counts + | GROUP BY ref_count + """.stripMargin).createOrReplaceTempView("ref_counts") + + // Transform the ref count to a state. + sql.sql(s""" + | SELECT CASE WHEN ref_count = 0 THEN "UNREFERENCED" + | WHEN ref_count = 1 THEN "REFERENCED" + | ELSE "EXTRAREFERENCES" END as state, + | nodes + | FROM ref_counts + """.stripMargin).createOrReplaceTempView("ref_counts") + + // Find all referenced but undefined nodes. + sql.sql(s""" + | SELECT $COLUMN_CLIENT as list, "UNDEFINED" as state, COUNT(*) as nodes + | FROM nodes t1 + | WHERE $COLUMN_PREV_ONE IS NOT NULL + | AND $COLUMN_PREV_TWO IS NOT NULL + | AND NOT EXISTS ( + | SELECT * FROM nodes t2 + | WHERE t1.$COLUMN_PREV_ONE = t2.$COLUMN_KEY_ONE + | AND t1.$COLUMN_PREV_TWO = t2.$COLUMN_KEY_TWO) + | GROUP BY $COLUMN_CLIENT + """.stripMargin).createOrReplaceTempView("undefined") + + // Combine the ref counts and undefined counts tables. + val rows = sql.sql(s""" + | SELECT state, nodes FROM ref_counts + | UNION ALL + | SELECT state, nodes FROM undefined + """.stripMargin).collect() + + // Extract the node counts for each state from the rows. + rows.foldLeft(Counts(0, 0, 0, 0))((counts, row) => { + val state = row.getString(0) + val count = row.getLong(1) + state match { + case "REFERENCED" => counts.copy(referenced = count) + case "UNREFERENCED" => counts.copy(unreferenced = count) + case "UNDEFINED" => counts.copy(undefined = count) + case "EXTRAREFERENCES" => counts.copy(extrareferences = count) + } + }) + } + + @InterfaceAudience.LimitedPrivate(Array("Test")) + def testMain(arguments: Array[String], ss: SparkSession): Counts = { + run(Args.parse(arguments), ss) + } + + def main(arguments: Array[String]): Unit = { + val args = Args.parse(arguments) + val conf = + new SparkConf().setAppName("Integration Test Big Linked List Generator") + val ss = SparkSession.builder().config(conf).getOrCreate() + + val counts = run(Args.parse(arguments), ss) + verify(args.nodes, counts).map(fail) + } +} + +object Looper { + import IntegrationTestBigLinkedList.log + import IntegrationTestBigLinkedList.fail + + def main(args: Array[String]): Unit = { + val conf = + new SparkConf().setAppName("Integration Test Big Linked List Looper") + val ss = SparkSession.builder().config(conf).getOrCreate() + + val genArgs = Generator.Args.parse(args) + var verifyArgs = Verifier + .Args(masterAddrs = genArgs.masterAddrs, tableName = genArgs.tableName) + val nodesPerLoop = genArgs.tasks * genArgs.lists * genArgs.nodes + + for (n <- Stream.from(1)) { + Generator.run(genArgs, ss) + val count = Verifier.run(verifyArgs, ss) + val expected = verifyArgs.nodes.map(_ + nodesPerLoop) + Verifier.verify(expected, count).map(fail) + verifyArgs = verifyArgs.copy(nodes = Some(expected.getOrElse(nodesPerLoop))) + log.info("*************************************************") + log.info(s"Completed $n loops. Nodes verified: ${count.referenced}") + log.info("*************************************************") + } + } +} diff --git a/java-scala-spark4/kudu-spark-tools/src/test/resources/TestImportExportFiles.csv b/java-scala-spark4/kudu-spark-tools/src/test/resources/TestImportExportFiles.csv new file mode 100644 index 0000000000..22144d2c6c --- /dev/null +++ b/java-scala-spark4/kudu-spark-tools/src/test/resources/TestImportExportFiles.csv @@ -0,0 +1,5 @@ +key,column1_i,column2_d,column3_s,column4_b +1,3,2.3,some string,true +2,5,4.5,some more,false +3,7,1.2,wait this is not a double bad row,true +4,9,10.1,trailing separator isn't bad mkay?,true \ No newline at end of file diff --git a/java-scala-spark4/kudu-spark-tools/src/test/resources/log4j2.properties b/java-scala-spark4/kudu-spark-tools/src/test/resources/log4j2.properties new file mode 100644 index 0000000000..22762a1560 --- /dev/null +++ b/java-scala-spark4/kudu-spark-tools/src/test/resources/log4j2.properties @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +status = error +name = PropertiesConfig +appenders = console + +appender.console.type = Console +appender.console.name = STDOUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n + +rootLogger.level = info +rootLogger.appenderRefs = stdout +rootLogger.appenderRef.stdout.ref = STDOUT + +logger.kudu.name = org.apache.kudu +logger.kudu.level = debug diff --git a/java-scala-spark4/kudu-spark-tools/src/test/scala/org/apache/kudu/spark/tools/DistributedDataGeneratorTest.scala b/java-scala-spark4/kudu-spark-tools/src/test/scala/org/apache/kudu/spark/tools/DistributedDataGeneratorTest.scala new file mode 100644 index 0000000000..4c7bd46814 --- /dev/null +++ b/java-scala-spark4/kudu-spark-tools/src/test/scala/org/apache/kudu/spark/tools/DistributedDataGeneratorTest.scala @@ -0,0 +1,145 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.tools + +import org.apache.kudu.Type +import org.apache.kudu.client.KuduPartitioner +import org.apache.kudu.spark.kudu.KuduTestSuite +import org.apache.kudu.test.RandomUtils +import org.apache.kudu.util.DecimalUtil +import org.apache.kudu.util.SchemaGenerator +import org.apache.kudu.spark.kudu.SparkListenerUtil.withJobTaskCounter +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Row +import org.junit.Test +import org.junit.Assert.assertEquals +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +class DistributedDataGeneratorTest extends KuduTestSuite { + val log: Logger = LoggerFactory.getLogger(getClass) + + private val generator = new SchemaGenerator.SchemaGeneratorBuilder() + .random(RandomUtils.getRandom) + // These types don't have enough values to prevent collisions. + .excludeTypes(Type.BOOL, Type.INT8) + // Ensure decimals have enough values to prevent collisions. + .precisionRange(DecimalUtil.MAX_DECIMAL32_PRECISION, DecimalUtil.MAX_DECIMAL_PRECISION) + .build() + + private val randomTableName: String = "random-table" + + @Test + def testGenerateRandomData() { + val numRows = 100 + val args = Array( + s"--num-rows=$numRows", + "--num-tasks=10", + "--type=random", + randomTableName, + harness.getMasterAddressesAsString) + val (metrics, rdd) = runGeneratorTest(args) + val (rowsWritten, collisions) = (metrics.rowsWritten.value, metrics.collisions.value) + // Collisions may cause the number of rows written to be less than the number generated. + assertEquals(rowsWritten, rdd.collect.length.toLong) + assertEquals(numRows, rowsWritten + collisions) + } + + @Test + def testGenerateSequentialData() { + val numRows = 100 + val args = Array( + s"--num-rows=$numRows", + "--num-tasks=10", + "--type=sequential", + randomTableName, + harness.getMasterAddressesAsString) + val (metrics, rdd) = runGeneratorTest(args) + val (rowsWritten, collisions) = (metrics.rowsWritten.value, metrics.collisions.value) + assertEquals(numRows.toLong, rowsWritten) + assertEquals(numRows, rdd.collect.length) + assertEquals(0L, collisions) + } + + @Test + def testRepartitionData() { + val numRows = 100 + val args = Array( + s"--num-rows=$numRows", + "--num-tasks=10", + "--type=sequential", + "--repartition=true", + randomTableName, + harness.getMasterAddressesAsString) + val (metrics, rdd) = runGeneratorTest(args) + val (rowsWritten, collisions) = (metrics.rowsWritten.value, metrics.collisions.value) + assertEquals(numRows.toLong, rowsWritten) + assertEquals(numRows, rdd.collect.length) + assertEquals(0L, collisions) + } + + @Test + def testNumTasks() { + val numTasks = 8 + val numRows = 100 + val args = Array( + s"--num-rows=$numRows", + s"--num-tasks=$numTasks", + randomTableName, + harness.getMasterAddressesAsString) + + // count the number of tasks that end. + val actualNumTasks = withJobTaskCounter(ss.sparkContext) { () => + runGeneratorTest(args) + } + assertEquals(numTasks, actualNumTasks) + } + + @Test + def testNumTasksRepartition(): Unit = { + val numTasks = 8 + val numRows = 100 + val args = Array( + s"--num-rows=$numRows", + s"--num-tasks=$numTasks", + "--repartition=true", + randomTableName, + harness.getMasterAddressesAsString) + + // count the number of tasks that end. + val actualNumTasks = withJobTaskCounter(ss.sparkContext) { () => + runGeneratorTest(args) + } + + val table = kuduContext.syncClient.openTable(randomTableName) + val numPartitions = new KuduPartitioner.KuduPartitionerBuilder(table).build().numPartitions() + + // We expect the number of tasks to be equal to numTasks + numPartitions because numTasks tasks + // are run to generate the data then we repartition the data to match the table partitioning + // and numPartitions tasks load the data. + assertEquals(numTasks + numPartitions, actualNumTasks) + } + + def runGeneratorTest(args: Array[String]): (GeneratorMetrics, RDD[Row]) = { + val schema = generator.randomSchema() + val options = generator.randomCreateTableOptions(schema) + kuduClient.createTable(randomTableName, schema, options) + val metrics = DistributedDataGenerator.testMain(args, ss) + (metrics, kuduContext.kuduRDD(ss.sparkContext, randomTableName)) + } +} diff --git a/java-scala-spark4/kudu-spark-tools/src/test/scala/org/apache/kudu/spark/tools/ITBigLinkedListTest.scala b/java-scala-spark4/kudu-spark-tools/src/test/scala/org/apache/kudu/spark/tools/ITBigLinkedListTest.scala new file mode 100644 index 0000000000..2fda7c88ed --- /dev/null +++ b/java-scala-spark4/kudu-spark-tools/src/test/scala/org/apache/kudu/spark/tools/ITBigLinkedListTest.scala @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.tools + +import org.apache.kudu.client.SessionConfiguration.FlushMode +import org.apache.kudu.spark.kudu.KuduTestSuite +import org.apache.kudu.spark.tools.BigLinkedListCommon._ +import org.junit.Assert._ +import org.junit.Test + +import scala.collection.JavaConverters._ + +class ITBigLinkedListTest extends KuduTestSuite { + + @Test + def testSparkITBLL() { + Generator.testMain( + Array( + "--tasks=2", + "--lists=2", + "--nodes=10000", + "--hash-partitions=2", + "--range-partitions=2", + "--replicas=1", + s"--master-addrs=${harness.getMasterAddressesAsString}" + ), + ss + ) + + // Insert bad nodes in order to test the verifier: + // + // (0, 0) points to an undefined node (-1, -1) + // (0, 1) points to (0, 0) + // (0, 2) points to (0, 0) + // + // Thus, (-1, -1) is undefined, (0, 0) is overreferenced, + // and (0, 1) and (0, 2) are unreferenced. + + val table = kuduClient.openTable(DEFAULT_TABLE_NAME) + val session = kuduClient.newSession() + session.setFlushMode(FlushMode.MANUAL_FLUSH) + + for ((key1, key2, prev1, prev2) <- List((0, 0, -1, -1), (0, 1, 0, 0), (0, 2, 0, 0))) { + val insert = table.newInsert() + insert.getRow.addLong(COLUMN_KEY_ONE_IDX, key1) + insert.getRow.addLong(COLUMN_KEY_TWO_IDX, key2) + insert.getRow.addLong(COLUMN_PREV_ONE_IDX, prev1) + insert.getRow.addLong(COLUMN_PREV_TWO_IDX, prev2) + insert.getRow.addLong(COLUMN_ROW_ID_IDX, -1) + insert.getRow.addString(COLUMN_CLIENT_IDX, "bad-nodes") + insert.getRow.addInt(COLUMN_UPDATE_COUNT_IDX, 0) + session.apply(insert) + } + + for (response <- session.flush().asScala) { + if (response.hasRowError) { + // This might indicate that the generated linked lists overlapped with + // the bad nodes, but the odds are low. + throw new AssertionError(response.getRowError.getErrorStatus.toString) + } + } + + val counts = Verifier + .testMain(Array(s"--master-addrs=${harness.getMasterAddressesAsString}"), ss) + assertEquals(2 * 2 * 10000, counts.referenced) + assertEquals(1, counts.extrareferences) + assertEquals(2, counts.unreferenced) + assertEquals(1, counts.undefined) + } +} diff --git a/java-scala-spark4/kudu-spark-tools/src/test/scala/org/apache/kudu/spark/tools/TestImportExportFiles.scala b/java-scala-spark4/kudu-spark-tools/src/test/scala/org/apache/kudu/spark/tools/TestImportExportFiles.scala new file mode 100644 index 0000000000..7c5b5e8e9b --- /dev/null +++ b/java-scala-spark4/kudu-spark-tools/src/test/scala/org/apache/kudu/spark/tools/TestImportExportFiles.scala @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.tools + +import java.io.File +import java.nio.file.Files +import java.nio.file.Paths + +import com.google.common.collect.ImmutableList +import org.apache.kudu.ColumnSchema.ColumnSchemaBuilder +import org.apache.kudu.Schema +import org.apache.kudu.Type +import org.apache.kudu.client.CreateTableOptions +import org.apache.kudu.client.KuduTable +import org.apache.kudu.spark.kudu._ +import org.junit.Assert.assertEquals +import org.junit.Before +import org.junit.Test + +import scala.collection.JavaConverters._ + +class TestImportExportFiles extends KuduTestSuite { + + private val TableDataPath = "/TestImportExportFiles.csv" + private val TableName = "TestImportExportFiles" + private val TableSchema = { + val columns = ImmutableList.of( + new ColumnSchemaBuilder("key", Type.STRING).key(true).build(), + new ColumnSchemaBuilder("column1_i", Type.STRING).build(), + new ColumnSchemaBuilder("column2_d", Type.STRING) + .nullable(true) + .build(), + new ColumnSchemaBuilder("column3_s", Type.STRING).build(), + new ColumnSchemaBuilder("column4_b", Type.STRING).build() + ) + new Schema(columns) + } + private val options = new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1) + + @Before + def setUp(): Unit = { + kuduClient.createTable(TableName, TableSchema, options) + } + + @Test + def testCSVImport() { + // Get the absolute path of the resource file. + val schemaResource = + classOf[TestImportExportFiles].getResource(TableDataPath) + val dataPath = Paths.get(schemaResource.toURI).toAbsolutePath + + ImportExportFiles.testMain( + Array( + "--operation=import", + "--format=csv", + s"--master-addrs=${harness.getMasterAddressesAsString}", + s"--path=$dataPath", + s"--table-name=$TableName", + "--delimiter=,", + "--header=true", + "--inferschema=true" + ), + ss + ) + val rdd = kuduContext.kuduRDD(ss.sparkContext, TableName, List("key")) + assert(rdd.collect.length == 4) + assertEquals(rdd.collect().mkString(","), "[1],[2],[3],[4]") + } + + @Test + def testRoundTrips(): Unit = { + val table = kuduClient.openTable(TableName) + loadSampleData(table, 50) + runRoundTripTest(TableName, s"$TableName-avro", "avro") + runRoundTripTest(TableName, s"$TableName-csv", "csv") + runRoundTripTest(TableName, s"$TableName-parquet", "parquet") + } + + // TODO(KUDU-2454): Use random schemas and random data to ensure all type/values round-trip. + private def loadSampleData(table: KuduTable, numRows: Int): Unit = { + val session = kuduClient.newSession() + Range(0, numRows).map { i => + val insert = table.newInsert + val row = insert.getRow + row.addString(0, i.toString) + row.addString(1, i.toString) + row.addString(3, i.toString) + row.addString(4, i.toString) + session.apply(insert) + } + session.close + } + + private def runRoundTripTest(fromTable: String, toTable: String, format: String): Unit = { + val dir = Files.createTempDirectory("round-trip") + val path = new File(dir.toFile, s"$fromTable-$format").getAbsolutePath + + // Export the data. + ImportExportFiles.testMain( + Array( + "--operation=export", + s"--format=$format", + s"--master-addrs=${harness.getMasterAddressesAsString}", + s"--path=$path", + s"--table-name=$fromTable", + s"--header=true" + ), + ss + ) + + // Create the target table. + kuduClient.createTable(toTable, TableSchema, options) + + // Import the data. + ImportExportFiles.testMain( + Array( + "--operation=import", + s"--format=$format", + s"--master-addrs=${harness.getMasterAddressesAsString}", + s"--path=$path", + s"--table-name=$toTable", + s"--header=true" + ), + ss + ) + + // Verify the tables match. + // TODO(KUDU-2454): Verify every value to ensure all values round trip. + val rdd1 = kuduContext.kuduRDD(ss.sparkContext, fromTable, List("key")) + val rdd2 = kuduContext.kuduRDD(ss.sparkContext, toTable, List("key")) + assertEquals(rdd1.count(), rdd2.count()) + } +} diff --git a/java-scala-spark4/kudu-spark/build.gradle b/java-scala-spark4/kudu-spark/build.gradle new file mode 100644 index 0000000000..abd7a4be5c --- /dev/null +++ b/java-scala-spark4/kudu-spark/build.gradle @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +apply plugin: "scala" +apply from: "$rootDir/gradle/shadow.gradle" + +dependencies { + compile project(path: ":kudu-client", configuration: "shadow") + compile libs.hdrhistogram + // TODO(KUDU-2500): Spark uses reflection which requires the annotations at runtime. + compile libs.yetusAnnotations + + provided libs.scalaLibrary + provided libs.sparkCore + provided libs.sparkSql + provided libs.slf4jApi + + testCompile project(path: ":kudu-test-utils", configuration: "shadow") + testCompile libs.junit + testCompile libs.scalatest + testCompile libs.log4jApi + testCompile libs.log4jCore + testCompile libs.log4jSlf4jImpl +} + +// Adjust the artifact name to include the spark and scala base versions. +archivesBaseName = "kudu-spark${versions.sparkBase}_${versions.scalaBase}" + +tasks.withType(com.github.spotbugs.snom.SpotBugsTask) { + // This class causes SpotBugs runtime errors, so we completely ignore it from analysis. + classes = classes.filter { !it.path.contains("SparkSQLTest") } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/java-scala-spark4/kudu-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister new file mode 100644 index 0000000000..7636cf8be8 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -0,0 +1 @@ +org.apache.kudu.spark.kudu.DefaultSource \ No newline at end of file diff --git a/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/DefaultSource.scala b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/DefaultSource.scala new file mode 100644 index 0000000000..e8c3a0c649 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/DefaultSource.scala @@ -0,0 +1,564 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import org.apache.kudu.client.AsyncKuduClient.EncryptionPolicy + +import java.net.InetAddress +import java.util.Locale +import scala.collection.JavaConverters._ +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.sources._ +import org.apache.spark.sql.types._ +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.Row +import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.SaveMode +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability +import org.slf4j.Logger +import org.slf4j.LoggerFactory +import org.apache.kudu.client.KuduPredicate.ComparisonOp +import org.apache.kudu.client._ +import org.apache.kudu.spark.kudu.KuduReadOptions._ +import org.apache.kudu.spark.kudu.KuduWriteOptions._ +import org.apache.kudu.spark.kudu.SparkUtil._ +import org.apache.spark.sql.execution.streaming.Sink +import org.apache.spark.sql.streaming.OutputMode + +/** + * Data source for integration with Spark's [[DataFrame]] API. + * + * Serves as a factory for [[KuduRelation]] instances for Spark. Spark will + * automatically look for a [[RelationProvider]] implementation named + * `DefaultSource` when the user specifies the path of a source during DDL + * operations through [[org.apache.spark.sql.DataFrameReader.format]]. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +class DefaultSource + extends DataSourceRegister with RelationProvider with CreatableRelationProvider + with SchemaRelationProvider with StreamSinkProvider { + + val TABLE_KEY = "kudu.table" + val KUDU_MASTER = "kudu.master" + val OPERATION = "kudu.operation" + val FAULT_TOLERANT_SCANNER = "kudu.faultTolerantScan" + val SCAN_LOCALITY = "kudu.scanLocality" + val IGNORE_NULL = "kudu.ignoreNull" + val IGNORE_DUPLICATE_ROW_ERRORS = "kudu.ignoreDuplicateRowErrors" + val REPARTITION = "kudu.repartition" + val REPARTITION_SORT = "kudu.repartition.sort" + val SCAN_REQUEST_TIMEOUT_MS = "kudu.scanRequestTimeoutMs" + val SOCKET_READ_TIMEOUT_MS = "kudu.socketReadTimeoutMs" + val BATCH_SIZE = "kudu.batchSize" + val KEEP_ALIVE_PERIOD_MS = "kudu.keepAlivePeriodMs" + val SPLIT_SIZE_BYTES = "kudu.splitSizeBytes" + val HANDLE_SCHEMA_DRIFT = "kudu.handleSchemaDrift" + val USE_DRIVER_METADATA = "kudu.useDriverMetadata" + val SNAPSHOT_TIMESTAMP_MS = "kudu.snapshotTimestampMs" + val SASL_PROTOCOL_NAME = "kudu.saslProtocolName" + val REQUIRE_AUTHENTICATION = "kudu.requireAuthentication" + val ENCRYPTION_POLICY = "kudu.encryptionPolicy" + + /** + * A nice alias for the data source so that when specifying the format + * "kudu" can be used in place of "org.apache.kudu.spark.kudu". + * Note: This class is discovered by Spark via the entry in + * `META-INF/services/org.apache.spark.sql.sources.DataSourceRegister` + */ + override def shortName(): String = "kudu" + + /** + * Construct a BaseRelation using the provided context and parameters. + * + * @param sqlContext SparkSQL context + * @param parameters parameters given to us from SparkSQL + * @return a BaseRelation Object + */ + override def createRelation( + sqlContext: SQLContext, + parameters: Map[String, String]): BaseRelation = { + createRelation(sqlContext, parameters, null) + } + + /** + * Construct a BaseRelation using the provided context, parameters and schema. + * + * @param sqlContext SparkSQL context + * @param parameters parameters given to us from SparkSQL + * @param schema the schema used to select columns for the relation + * @return a BaseRelation Object + */ + override def createRelation( + sqlContext: SQLContext, + parameters: Map[String, String], + schema: StructType): BaseRelation = { + val tableName = getTableName(parameters) + val kuduMaster = getMasterAddrs(parameters) + val operationType = getOperationType(parameters) + val saslProtocolName = getSaslProtocolName(parameters) + val requireAuthentication = getRequireAuthentication(parameters) + val encryptionPolicy = getEncryptionPolicy(parameters) + val schemaOption = Option(schema) + val readOptions = getReadOptions(parameters) + val writeOptions = getWriteOptions(parameters) + + new KuduRelation( + tableName, + kuduMaster, + saslProtocolName, + requireAuthentication, + encryptionPolicy, + operationType, + schemaOption, + readOptions, + writeOptions + )(sqlContext) + } + + /** + * Creates a relation and inserts data to specified table. + * + * @param sqlContext + * @param mode Only Append mode is supported. It will upsert or insert data + * to an existing table, depending on the upsert parameter + * @param parameters Necessary parameters for kudu.table, kudu.master, etc... + * @param data Dataframe to save into kudu + * @return returns populated base relation + */ + override def createRelation( + sqlContext: SQLContext, + mode: SaveMode, + parameters: Map[String, String], + data: DataFrame): BaseRelation = { + val kuduRelation = createRelation(sqlContext, parameters) + mode match { + case SaveMode.Append => + kuduRelation.asInstanceOf[KuduRelation].insert(data, false) + case _ => + throw new UnsupportedOperationException("Currently, only Append is supported") + } + kuduRelation + } + + override def createSink( + sqlContext: SQLContext, + parameters: Map[String, String], + partitionColumns: Seq[String], + outputMode: OutputMode): Sink = { + + val tableName = getTableName(parameters) + val masterAddrs = getMasterAddrs(parameters) + val operationType = getOperationType(parameters) + val saslProtocolName = getSaslProtocolName(parameters) + val requireAuthentication = getRequireAuthentication(parameters) + val encryptionPolicy = getEncryptionPolicy(parameters) + val readOptions = getReadOptions(parameters) + val writeOptions = getWriteOptions(parameters) + + new KuduSink( + tableName, + masterAddrs, + saslProtocolName, + requireAuthentication, + encryptionPolicy, + operationType, + readOptions, + writeOptions + )(sqlContext) + } + + private def getTableName(parameters: Map[String, String]): String = { + parameters.getOrElse( + TABLE_KEY, + throw new IllegalArgumentException( + s"Kudu table name must be specified in create options using key '$TABLE_KEY'")) + } + private def getReadOptions(parameters: Map[String, String]): KuduReadOptions = { + val batchSize = parameters.get(BATCH_SIZE).map(_.toInt).getOrElse(defaultBatchSize) + val faultTolerantScanner = + parameters.get(FAULT_TOLERANT_SCANNER).map(_.toBoolean).getOrElse(defaultFaultTolerantScanner) + val scanLocality = + parameters.get(SCAN_LOCALITY).map(getScanLocalityType).getOrElse(defaultScanLocality) + val scanRequestTimeoutMs = parameters.get(SCAN_REQUEST_TIMEOUT_MS).map(_.toLong) + val keepAlivePeriodMs = + parameters.get(KEEP_ALIVE_PERIOD_MS).map(_.toLong).getOrElse(defaultKeepAlivePeriodMs) + val splitSizeBytes = parameters.get(SPLIT_SIZE_BYTES).map(_.toLong) + val useDriverMetadata = + parameters.get(USE_DRIVER_METADATA).map(_.toBoolean).getOrElse(defaultUseDriverMetadata) + val snapshotTimestampMs = parameters.get(SNAPSHOT_TIMESTAMP_MS).map(_.toLong) + KuduReadOptions( + batchSize, + scanLocality, + faultTolerantScanner, + keepAlivePeriodMs, + scanRequestTimeoutMs, + /* socketReadTimeoutMs= */ None, + splitSizeBytes, + useDriverMetadata, + snapshotTimestampMs + ) + } + + private def getWriteOptions(parameters: Map[String, String]): KuduWriteOptions = { + val ignoreDuplicateRowErrors = + parameters + .get(IGNORE_DUPLICATE_ROW_ERRORS) + .map(_.toBoolean) + .getOrElse(defaultIgnoreDuplicateRowErrors) + val ignoreNull = + parameters.get(IGNORE_NULL).map(_.toBoolean).getOrElse(defaultIgnoreNull) + val repartition = + parameters.get(REPARTITION).map(_.toBoolean).getOrElse(defaultRepartition) + val repartitionSort = + parameters.get(REPARTITION_SORT).map(_.toBoolean).getOrElse(defaultRepartitionSort) + val handleSchemaDrift = + parameters.get(HANDLE_SCHEMA_DRIFT).map(_.toBoolean).getOrElse(defaultHandleSchemaDrift) + KuduWriteOptions( + ignoreDuplicateRowErrors, + ignoreNull, + repartition, + repartitionSort, + handleSchemaDrift) + } + + private def getMasterAddrs(parameters: Map[String, String]): String = { + parameters.getOrElse(KUDU_MASTER, InetAddress.getLocalHost.getCanonicalHostName) + } + + private def getSaslProtocolName(parameters: Map[String, String]): String = { + parameters.getOrElse(SASL_PROTOCOL_NAME, "kudu") + } + + private def getRequireAuthentication(parameters: Map[String, String]): Boolean = { + parameters.get(REQUIRE_AUTHENTICATION).exists(_.toBoolean) + } + + private def getEncryptionPolicy(parameters: Map[String, String]): EncryptionPolicy = { + parameters.getOrElse(ENCRYPTION_POLICY, "optional").toLowerCase(Locale.ENGLISH) match { + case "optional" => EncryptionPolicy.OPTIONAL + case "required" => EncryptionPolicy.REQUIRED + case "required_remote" => EncryptionPolicy.REQUIRED_REMOTE + } + } + + private def getScanLocalityType(opParam: String): ReplicaSelection = { + opParam.toLowerCase(Locale.ENGLISH) match { + case "leader_only" => ReplicaSelection.LEADER_ONLY + case "closest_replica" => ReplicaSelection.CLOSEST_REPLICA + case _ => + throw new IllegalArgumentException(s"Unsupported replica selection type '$opParam'") + } + } + + private def getOperationType(parameters: Map[String, String]): OperationType = { + parameters + .get(OPERATION) + .map(stringToOperationType) + .getOrElse(org.apache.kudu.spark.kudu.Upsert) + } + + private def stringToOperationType(opParam: String): OperationType = { + opParam.toLowerCase(Locale.ENGLISH) match { + case "insert" => org.apache.kudu.spark.kudu.Insert + case "insert_ignore" => + org.apache.kudu.spark.kudu.InsertIgnore + case "insert-ignore" => + org.apache.kudu.spark.kudu.InsertIgnore + case "upsert" => org.apache.kudu.spark.kudu.Upsert + case "update" => org.apache.kudu.spark.kudu.Update + case "update_ignore" => + org.apache.kudu.spark.kudu.UpdateIgnore + case "update-ignore" => + org.apache.kudu.spark.kudu.UpdateIgnore + case "delete" => org.apache.kudu.spark.kudu.Delete + case "delete_ignore" => + org.apache.kudu.spark.kudu.DeleteIgnore + case "delete-ignore" => + org.apache.kudu.spark.kudu.DeleteIgnore + case _ => + throw new IllegalArgumentException(s"Unsupported operation type '$opParam'") + } + } +} + +/** + * Implementation of Spark BaseRelation. + * + * @param tableName Kudu table that we plan to read from + * @param masterAddrs Kudu master addresses + * @param operationType The default operation type to perform when writing to the relation + * @param userSchema A schema used to select columns for the relation + * @param readOptions Kudu read options + * @param writeOptions Kudu write options + * @param sqlContext SparkSQL context + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +class KuduRelation( + val tableName: String, + val masterAddrs: String, + val saslProtocolName: String, + val requireAuthentication: Boolean = false, + val encryptionPolicy: EncryptionPolicy = EncryptionPolicy.OPTIONAL, + val operationType: OperationType, + val userSchema: Option[StructType], + val readOptions: KuduReadOptions = new KuduReadOptions, + val writeOptions: KuduWriteOptions = new KuduWriteOptions)(val sqlContext: SQLContext) + extends BaseRelation with PrunedFilteredScan with InsertableRelation { + val log: Logger = LoggerFactory.getLogger(getClass) + + private val context: KuduContext = + new KuduContext( + masterAddrs, + sqlContext.sparkContext, + None, + Some(saslProtocolName), + requireAuthentication, + encryptionPolicy) + + private val table: KuduTable = context.syncClient.openTable(tableName) + + private val estimatedSize: Long = { + try { + table.getTableStatistics().getOnDiskSize + } catch { + case e: Exception => + log.warn( + "Error while getting table statistic from master, maybe the current" + + " master doesn't support the rpc, please check the version.", + e) + super.sizeInBytes + } + } + + /** + * Estimated size of this relation in bytes, this information is used by spark to + * decide whether it is safe to broadcast a relation such as in join selection. It + * is always better to overestimate this size than underestimate, because underestimation + * may lead to expensive execution plan such as broadcasting a very large table which + * will cause great network bandwidth consumption. + * TODO(KUDU-2933): Consider projection and predicates in size estimation. + * + * @return size of this relation in bytes + */ + override def sizeInBytes: Long = estimatedSize + + override def unhandledFilters(filters: Array[Filter]): Array[Filter] = + filters.filterNot(KuduRelation.supportsFilter) + + /** + * Generates a SparkSQL schema object so SparkSQL knows what is being + * provided by this BaseRelation. + * + * @return schema generated from the Kudu table's schema + */ + override def schema: StructType = { + sparkSchema(table.getSchema, userSchema.map(_.fieldNames)) + } + + /** + * Build the RDD to scan rows. + * + * @param requiredColumns columns that are being requested by the requesting query + * @param filters filters that are being applied by the requesting query + * @return RDD will all the results from Kudu + */ + override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = { + val predicates = filters.flatMap(filterToPredicate) + new KuduRDD( + context, + table, + requiredColumns, + predicates, + readOptions, + sqlContext.sparkContext + ) + } + + /** + * Converts a Spark [[Filter]] to a Kudu [[KuduPredicate]]. + * + * @param filter the filter to convert + * @return the converted filter + */ + private def filterToPredicate(filter: Filter): Array[KuduPredicate] = { + filter match { + case EqualTo(column, value) => + Array(comparisonPredicate(column, ComparisonOp.EQUAL, value)) + case GreaterThan(column, value) => + Array(comparisonPredicate(column, ComparisonOp.GREATER, value)) + case GreaterThanOrEqual(column, value) => + Array(comparisonPredicate(column, ComparisonOp.GREATER_EQUAL, value)) + case LessThan(column, value) => + Array(comparisonPredicate(column, ComparisonOp.LESS, value)) + case LessThanOrEqual(column, value) => + Array(comparisonPredicate(column, ComparisonOp.LESS_EQUAL, value)) + case In(column, values) => + Array(inListPredicate(column, values)) + case StringStartsWith(column, prefix) => + prefixInfimum(prefix) match { + case None => + Array(comparisonPredicate(column, ComparisonOp.GREATER_EQUAL, prefix)) + case Some(inf) => + Array( + comparisonPredicate(column, ComparisonOp.GREATER_EQUAL, prefix), + comparisonPredicate(column, ComparisonOp.LESS, inf)) + } + case IsNull(column) => Array(isNullPredicate(column)) + case IsNotNull(column) => Array(isNotNullPredicate(column)) + case And(left, right) => + filterToPredicate(left) ++ filterToPredicate(right) + case _ => Array() + } + } + + /** + * Returns the smallest string s such that, if p is a prefix of t, + * then t < s, if one exists. + * + * @param p the prefix + * @return Some(the prefix infimum), or None if none exists. + */ + private def prefixInfimum(p: String): Option[String] = { + p.reverse.dropWhile(_ == Char.MaxValue).reverse match { + case "" => None + case q => Some(q.slice(0, q.length - 1) + (q(q.length - 1) + 1).toChar) + } + } + + /** + * Creates a new comparison predicate for the column, comparison operator, and comparison value. + * + * @param column the column name + * @param operator the comparison operator + * @param value the comparison value + * @return the comparison predicate + */ + private def comparisonPredicate( + column: String, + operator: ComparisonOp, + value: Any): KuduPredicate = { + KuduPredicate.newComparisonPredicate(table.getSchema.getColumn(column), operator, value) + } + + /** + * Creates a new in list predicate for the column and values. + * + * @param column the column name + * @param values the values + * @return the in list predicate + */ + private def inListPredicate(column: String, values: Array[Any]): KuduPredicate = { + KuduPredicate.newInListPredicate(table.getSchema.getColumn(column), values.toList.asJava) + } + + /** + * Creates a new `IS NULL` predicate for the column. + * + * @param column the column name + * @return the `IS NULL` predicate + */ + private def isNullPredicate(column: String): KuduPredicate = { + KuduPredicate.newIsNullPredicate(table.getSchema.getColumn(column)) + } + + /** + * Creates a new `IS NOT NULL` predicate for the column. + * + * @param column the column name + * @return the `IS NOT NULL` predicate + */ + private def isNotNullPredicate(column: String): KuduPredicate = { + KuduPredicate.newIsNotNullPredicate(table.getSchema.getColumn(column)) + } + + /** + * Writes data into an existing Kudu table. + * + * If the `kudu.operation` parameter is set, the data will use that operation + * type. If the parameter is unset, the data will be upserted. + * + * @param data [[DataFrame]] to be inserted into Kudu + * @param overwrite must be false; otherwise, throws [[UnsupportedOperationException]] + */ + override def insert(data: DataFrame, overwrite: Boolean): Unit = { + if (overwrite) { + throw new UnsupportedOperationException("overwrite is not yet supported") + } + context.writeRows(data, tableName, operationType, writeOptions) + } + + /** + * Returns the string representation of this KuduRelation + * @return Kudu + tableName of the relation + */ + override def toString(): String = { + "Kudu " + this.tableName + } +} + +private[spark] object KuduRelation { + + /** + * Returns `true` if the filter is able to be pushed down to Kudu. + * + * @param filter the filter to test + */ + // formatter: off + private def supportsFilter(filter: Filter): Boolean = filter match { + case EqualTo(_, _) | GreaterThan(_, _) | GreaterThanOrEqual(_, _) | LessThan(_, _) | + LessThanOrEqual(_, _) | In(_, _) | StringStartsWith(_, _) | IsNull(_) | IsNotNull(_) => + true + case And(left, right) => supportsFilter(left) && supportsFilter(right) + case _ => false + } + // formatter: on +} + +/** + * Sinks provide at-least-once semantics by retrying failed batches, + * and provide a `batchId` interface to implement exactly-once-semantics. + * Since Kudu does not internally track batch IDs, this is ignored, + * and it is up to the user to specify an appropriate `operationType` to achieve + * the desired semantics when adding batches. + * + * The default `Upsert` allows for KuduSink to handle duplicate data and such retries. + * + * Insert ignore support (KUDU-1563) would be useful, but while that doesn't exist, + * using Upsert will work. Delete ignore would also be useful. + */ +class KuduSink( + val tableName: String, + val masterAddrs: String, + val saslProtocolName: String, + val requireAuthentication: Boolean = false, + val encryptionPolicy: EncryptionPolicy = EncryptionPolicy.OPTIONAL, + val operationType: OperationType, + val readOptions: KuduReadOptions = new KuduReadOptions, + val writeOptions: KuduWriteOptions)(val sqlContext: SQLContext) + extends Sink { + + private val context: KuduContext = + new KuduContext(masterAddrs, sqlContext.sparkContext, None, Some(saslProtocolName)) + + override def addBatch(batchId: Long, data: DataFrame): Unit = { + context.writeRows(data, tableName, operationType, writeOptions) + } +} diff --git a/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/HdrHistogramAccumulator.scala b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/HdrHistogramAccumulator.scala new file mode 100644 index 0000000000..c775930c26 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/HdrHistogramAccumulator.scala @@ -0,0 +1,147 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import scala.collection.JavaConverters._ + +import org.apache.spark.util.AccumulatorV2 +import org.HdrHistogram.IntCountsHistogram + +/* + * A Spark accumulator that aggregates values into an HDR histogram. + * + * This class is a wrapper for a wrapper around an HdrHistogram[1]. The purpose + * of the double-wrapping is to work around how Spark displays accumulators in + * its web UI. Accumulators are displayed using AccumulatorV2#value's toString + * and not the toString method of the AccumulatorV2 (see [2]). So, to provide + * a useful display for the histogram on the web UI, we wrap the HdrHistogram + * in a wrapper class, implement toString on the wrapper class, and make the + * wrapper class the value class of the Accumulator. + * + * [1]: https://github.com/HdrHistogram/HdrHistogram + * [2]: https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala#L216 + */ +private[kudu] class HdrHistogramAccumulator(histogram: HistogramWrapper = new HistogramWrapper()) + extends AccumulatorV2[Int, HistogramWrapper] { + + override def isZero: Boolean = { + histogram.isZero + } + + override def copy(): AccumulatorV2[Int, HistogramWrapper] = { + new HdrHistogramAccumulator(histogram.copy()) + } + + override def reset(): Unit = { + histogram.reset() + } + + override def add(v: Int): Unit = { + histogram.add(v) + } + + override def merge(other: AccumulatorV2[Int, HistogramWrapper]): Unit = { + histogram.add(other.value) + } + + override def value: HistogramWrapper = histogram + + override def toString: String = histogram.toString +} + +/* + * A wrapper for a IntCountsHistogram from the HdrHistogram library. See the + * comment on the declaration of the HdrHistogramAccumulator for why this class + * exists. + * + * synchronized is used because accumulators may be read from multiple threads concurrently. + * + * An option is used for innerHistogram so we can only initialize the histogram if it is used. + */ +private[kudu] class HistogramWrapper(var innerHistogram: Option[IntCountsHistogram] = None) + extends Serializable { + + def isZero: Boolean = { + innerHistogram.synchronized { + innerHistogram.isEmpty + } + } + + def copy(): HistogramWrapper = { + innerHistogram.synchronized { + new HistogramWrapper(innerHistogram.map(_.copy())) + } + } + + def reset(): Unit = { + innerHistogram.synchronized { + if (innerHistogram.isDefined) { + innerHistogram.get.reset() + } + innerHistogram = None + } + } + + def add(v: Int) { + innerHistogram.synchronized { + initializeIfEmpty() + innerHistogram.get.recordValue(v) + } + } + + def add(other: HistogramWrapper) { + innerHistogram.synchronized { + if (other.innerHistogram.isEmpty) { + return + } + initializeIfEmpty() + innerHistogram.get.add(other.innerHistogram.get) + } + } + + private def initializeIfEmpty(): Unit = { + if (innerHistogram.isEmpty) { + innerHistogram = Some(new IntCountsHistogram(2)) + } + } + + override def toString: String = { + innerHistogram.synchronized { + if (innerHistogram.isEmpty) { + return "0ms" + } + + if (innerHistogram.get.getTotalCount == 1) { + return s"${innerHistogram.get.getMinValue}ms" + } + // The argument to SynchronizedHistogram#percentiles is the number of + // ticks per half distance to 100%. So, a value of 1 produces values for + // the percentiles 50, 75, 87.5, ~95, ~97.5, etc., until all histogram + // values have been exhausted. It's a little wonky if there are very few + // values in the histogram-- it might print out the same percentile a + // couple of times- but it's really nice for larger histograms. + innerHistogram.get + .percentiles(1) + .asScala + .map { pv => + s"${pv.getPercentile}%: ${pv.getValueIteratedTo}ms" + } + .mkString(", ") + } + } +} diff --git a/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduContext.scala b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduContext.scala new file mode 100644 index 0000000000..68c3dc7114 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduContext.scala @@ -0,0 +1,660 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import java.security.AccessController +import java.security.PrivilegedAction +import javax.security.auth.Subject +import javax.security.auth.login.AppConfigurationEntry +import javax.security.auth.login.Configuration +import javax.security.auth.login.LoginContext +import scala.collection.JavaConverters._ +import scala.collection.mutable +import org.apache.hadoop.util.ShutdownHookManager +import org.apache.spark.Partitioner +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.util.TypeUtils +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.Row +import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.AccumulatorV2 +import org.apache.spark.util.CollectionAccumulator +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability +import org.slf4j.Logger +import org.slf4j.LoggerFactory +import org.apache.kudu.client.SessionConfiguration.FlushMode +import org.apache.kudu.client._ +import org.apache.kudu.spark.kudu.SparkUtil.kuduSchema +import org.apache.kudu.spark.kudu.SparkUtil._ +import org.apache.kudu.Schema +import org.apache.kudu.Type +import org.apache.kudu.client.AsyncKuduClient.EncryptionPolicy +import org.apache.spark.unsafe.types.ByteArray + +/** + * KuduContext is a serializable container for Kudu client connections. + * + * If a Kudu client connection is needed as part of a Spark application, a + * [[KuduContext]] should be created in the driver, and shared with executors + * as a serializable field. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +@SerialVersionUID(1L) +class KuduContext( + val kuduMaster: String, + sc: SparkContext, + val socketReadTimeoutMs: Option[Long], + val saslProtocolName: Option[String] = None, + val requireAuthentication: Boolean = false, + val encryptionPolicy: EncryptionPolicy = EncryptionPolicy.OPTIONAL) + extends Serializable { + val log: Logger = LoggerFactory.getLogger(getClass) + + def this(kuduMaster: String, sc: SparkContext) = this(kuduMaster, sc, None) + + // An accumulator that collects all the rows written to Kudu for testing only. + // Enabled by setting captureRows = true. + private[kudu] var captureRows = false + private[kudu] var rowsAccumulator: CollectionAccumulator[Row] = + sc.collectionAccumulator[Row]("kudu.rows") + + /** + * A collection of accumulator metrics describing the usage of a KuduContext. + */ + private[kudu] val numInserts: MapAccumulator[String, Long] = + new MapAccumulator[String, Long](Math.addExact) + private[kudu] val numUpserts: MapAccumulator[String, Long] = + new MapAccumulator[String, Long](Math.addExact) + private[kudu] val numUpdates: MapAccumulator[String, Long] = + new MapAccumulator[String, Long](Math.addExact) + private[kudu] val numDeletes: MapAccumulator[String, Long] = + new MapAccumulator[String, Long](Math.addExact) + sc.register(numInserts, "kudu.num_inserts") + sc.register(numUpserts, "kudu.num_upserts") + sc.register(numUpdates, "kudu.num_updates") + sc.register(numDeletes, "kudu.num_deletes") + + // Increments the appropriate metric given an OperationType and a count per table. + private def addForOperation(count: Long, opType: OperationType, tableName: String): Unit = { + opType match { + case org.apache.kudu.spark.kudu.Insert => numInserts.add((tableName, count)) + case org.apache.kudu.spark.kudu.InsertIgnore => numInserts.add((tableName, count)) + case org.apache.kudu.spark.kudu.Upsert => numUpserts.add((tableName, count)) + case org.apache.kudu.spark.kudu.Update => numUpdates.add((tableName, count)) + case org.apache.kudu.spark.kudu.UpdateIgnore => numUpdates.add((tableName, count)) + case org.apache.kudu.spark.kudu.Delete => numDeletes.add((tableName, count)) + case org.apache.kudu.spark.kudu.DeleteIgnore => numDeletes.add((tableName, count)) + } + } + + /** + * TimestampAccumulator accumulates the maximum value of client's + * propagated timestamp of all executors and can only read by the + * driver. + */ + private[kudu] class TimestampAccumulator(var timestamp: Long = 0L) + extends AccumulatorV2[Long, Long] { + override def isZero: Boolean = { + timestamp == 0 + } + + override def copy(): AccumulatorV2[Long, Long] = { + new TimestampAccumulator(timestamp) + } + + override def reset(): Unit = { + timestamp = 0L + } + + override def add(v: Long): Unit = { + timestamp = timestamp.max(v) + } + + override def merge(other: AccumulatorV2[Long, Long]): Unit = { + timestamp = timestamp.max(other.value) + + // Since for every write/scan operation, each executor holds its own copy of + // client. We need to update the propagated timestamp on the driver based on + // the latest propagated timestamp from all executors through TimestampAccumulator. + syncClient.updateLastPropagatedTimestamp(timestampAccumulator.value) + } + + override def value: Long = timestamp + } + + val timestampAccumulator = new TimestampAccumulator() + sc.register(timestampAccumulator) + + val durationHistogram = new HdrHistogramAccumulator() + sc.register(durationHistogram, "kudu.write_duration") + + @deprecated("Use KuduContext constructor", "1.4.0") + def this(kuduMaster: String) { + this(kuduMaster, new SparkContext()) + } + + @transient lazy val syncClient: KuduClient = asyncClient.syncClient() + + @transient lazy val asyncClient: AsyncKuduClient = { + val c = KuduClientCache + .getAsyncClient(kuduMaster, saslProtocolName, requireAuthentication, encryptionPolicy) + if (authnCredentials != null) { + c.importAuthenticationCredentials(authnCredentials) + } + c + } + + @transient lazy val supportsIgnoreOperations: Boolean = syncClient.supportsIgnoreOperations() + + // Visible for testing. + private[kudu] val authnCredentials: Array[Byte] = { + Subject.doAs(KuduContext.getSubject(sc), new PrivilegedAction[Array[Byte]] { + override def run(): Array[Byte] = + syncClient.exportAuthenticationCredentials() + }) + } + + /** + * Create an RDD from a Kudu table. + * + * @param tableName table to read from + * @param columnProjection list of columns to read. Not specifying this at all + * (i.e. setting to null) or setting to the special + * string '*' means to project all columns + * @return a new RDD that maps over the given table for the selected columns + */ + def kuduRDD( + sc: SparkContext, + tableName: String, + columnProjection: Seq[String] = Nil, + options: KuduReadOptions = KuduReadOptions()): RDD[Row] = { + new KuduRDD( + this, + syncClient.openTable(tableName), + columnProjection.toArray, + Array(), + options, + sc) + } + + /** + * Check if kudu table already exists + * + * @param tableName name of table to check + * @return true if table exists, false if table does not exist + */ + def tableExists(tableName: String): Boolean = + syncClient.tableExists(tableName) + + /** + * Delete kudu table + * + * @param tableName name of table to delete + * @return DeleteTableResponse + */ + def deleteTable(tableName: String): DeleteTableResponse = + syncClient.deleteTable(tableName) + + /** + * Creates a kudu table for the given schema. Partitioning can be specified through options. + * + * @param tableName table to create + * @param schema struct schema of table + * @param keys primary keys of the table + * @param options replication and partitioning options for the table + * @return the KuduTable that was created + */ + def createTable( + tableName: String, + schema: StructType, + keys: Seq[String], + options: CreateTableOptions): KuduTable = { + val kuduSchema = createSchema(schema, keys) + createTable(tableName, kuduSchema, options) + } + + /** + * Creates a kudu table for the given schema. Partitioning can be specified through options. + * + * @param tableName table to create + * @param schema schema of table + * @param options replication and partitioning options for the table + * @return the KuduTable that was created + */ + def createTable(tableName: String, schema: Schema, options: CreateTableOptions): KuduTable = { + syncClient.createTable(tableName, schema, options) + } + + /** + * Creates a kudu schema for the given struct schema. + * + * @param schema struct schema of table + * @param keys primary keys of the table + * @return the Kudu schema + */ + def createSchema(schema: StructType, keys: Seq[String]): Schema = { + kuduSchema(schema, keys) + } + + /** Map Spark SQL type to Kudu type */ + def kuduType(dt: DataType): Type = { + sparkTypeToKuduType(dt) + } + + /** + * Inserts the rows of a [[DataFrame]] into a Kudu table. + * + * @param data the data to insert + * @param tableName the Kudu table to insert into + * @param writeOptions the Kudu write options + */ + def insertRows( + data: DataFrame, + tableName: String, + writeOptions: KuduWriteOptions = new KuduWriteOptions): Unit = { + log.info(s"inserting into table '$tableName'") + writeRows(data, tableName, org.apache.kudu.spark.kudu.Insert, writeOptions) + log.info(s"inserted ${numInserts.value.get(tableName)} rows into table '$tableName'") + } + + /** + * Inserts the rows of a [[DataFrame]] into a Kudu table, ignoring any new + * rows that have a primary key conflict with existing rows. + * + * This function call is equivalent to the following, which is preferred: + * {{{ + * insertRows(data, tableName, new KuduWriteOptions(ignoreDuplicateRowErrors = true)) + * }}} + * + * @param data the data to insert into Kudu + * @param tableName the Kudu table to insert into + */ + @deprecated( + "Use KuduContext.insertRows(data, tableName, new KuduWriteOptions(ignoreDuplicateRowErrors = true))", + "1.8.0") + def insertIgnoreRows(data: DataFrame, tableName: String): Unit = { + val writeOptions = KuduWriteOptions(ignoreDuplicateRowErrors = true) + log.info(s"inserting into table '$tableName'") + writeRows(data, tableName, org.apache.kudu.spark.kudu.Insert, writeOptions) + log.info(s"inserted ${numInserts.value.get(tableName)} rows into table '$tableName'") + } + + /** + * Upserts the rows of a [[DataFrame]] into a Kudu table. + * + * @param data the data to upsert into Kudu + * @param tableName the Kudu table to upsert into + * @param writeOptions the Kudu write options + */ + def upsertRows( + data: DataFrame, + tableName: String, + writeOptions: KuduWriteOptions = new KuduWriteOptions): Unit = { + log.info(s"upserting into table '$tableName'") + writeRows(data, tableName, org.apache.kudu.spark.kudu.Upsert, writeOptions) + log.info(s"upserted ${numUpserts.value.get(tableName)} rows into table '$tableName'") + } + + /** + * Updates a Kudu table with the rows of a [[DataFrame]]. + * + * @param data the data to update into Kudu + * @param tableName the Kudu table to update + * @param writeOptions the Kudu write options + */ + def updateRows( + data: DataFrame, + tableName: String, + writeOptions: KuduWriteOptions = new KuduWriteOptions): Unit = { + log.info(s"updating rows in table '$tableName'") + writeRows(data, tableName, org.apache.kudu.spark.kudu.Update, writeOptions) + log.info(s"updated ${numUpdates.value.get(tableName)} rows in table '$tableName'") + } + + /** + * Deletes the rows of a [[DataFrame]] from a Kudu table, ignoring any none-existing + * rows. + * + * @param data the data to delete from Kudu + * note that only the key columns should be specified for deletes + * @param tableName The Kudu tabe to delete from + * @param writeOptions the Kudu write options + */ + def deleteIgnoreRows( + data: DataFrame, + tableName: String, + writeOptions: KuduWriteOptions = new KuduWriteOptions): Unit = { + log.info(s"deleting rows from table '$tableName'") + writeRows(data, tableName, org.apache.kudu.spark.kudu.DeleteIgnore, writeOptions) + log.info( + s"deleted up to ${numDeletes.value.get(tableName)} rows from table '$tableName' using DELETE_IGNORE") + } + + /** + * Deletes the rows of a [[DataFrame]] from a Kudu table. + * + * @param data the data to delete from Kudu + * note that only the key columns should be specified for deletes + * @param tableName The Kudu tabe to delete from + * @param writeOptions the Kudu write options + */ + def deleteRows( + data: DataFrame, + tableName: String, + writeOptions: KuduWriteOptions = new KuduWriteOptions): Unit = { + log.info(s"deleting rows from table '$tableName'") + writeRows(data, tableName, org.apache.kudu.spark.kudu.Delete, writeOptions) + log.info(s"deleted ${numDeletes.value.get(tableName)} rows from table '$tableName'") + } + + private[kudu] def writeRows( + data: DataFrame, + tableName: String, + operation: OperationType, + writeOptions: KuduWriteOptions = new KuduWriteOptions) { + val schema = data.schema + // Get the client's last propagated timestamp on the driver. + val lastPropagatedTimestamp = syncClient.getLastPropagatedTimestamp + + if (writeOptions.handleSchemaDrift) { + val kuduSchema = syncClient.openTable(tableName).getSchema + val newColumns = schema.fields.filter(f => !kuduSchema.hasColumn(f.name)) + if (!newColumns.isEmpty) { + log.info( + s"adding ${newColumns.length} columns to table '$tableName' to handle schema drift") + val alter = new AlterTableOptions() + newColumns.foreach { col => + alter.addNullableColumn(col.name, sparkTypeToKuduType(col.dataType)) + } + try { + syncClient.alterTable(tableName, alter) + } catch { + case e: KuduException => + // Ignore the exception if the column already exists due to concurrent + // applications attempting to handle schema drift. + if (e.getStatus.isAlreadyPresent) { + log.info(s"column already exists in table '$tableName' while handling schema drift") + } else { + throw e + } + } + } + } + + // Convert to an RDD and map the InternalRows to Rows. + // This avoids any corruption as reported in SPARK-26880. + var rdd = data.queryExecution.toRdd.mapPartitions { rows => + val table = syncClient.openTable(tableName) + val converter = new RowConverter(table.getSchema, schema, writeOptions.ignoreNull) + rows.map(converter.toRow) + } + + if (writeOptions.repartition) { + rdd = repartitionRows(rdd, tableName, schema, writeOptions) + } + + // If the cluster doesn't support the INSERT_IGNORE operation fallback to the old + // session based style. + var adjustedOperation = operation + var adjustedWriteOptions = writeOptions + if (operation == org.apache.kudu.spark.kudu.InsertIgnore && !supportsIgnoreOperations) { + adjustedOperation = org.apache.kudu.spark.kudu.Insert; + adjustedWriteOptions = writeOptions.copy(ignoreDuplicateRowErrors = true); + } + + // Write the rows for each Spark partition. + rdd.foreachPartition(iterator => { + val pendingErrors = writePartitionRows( + iterator, + schema, + tableName, + adjustedOperation, + lastPropagatedTimestamp, + adjustedWriteOptions) + if (pendingErrors.getRowErrors.nonEmpty) { + val errors = pendingErrors.getRowErrors + val sample = errors.take(5).map(_.getErrorStatus).mkString + if (pendingErrors.isOverflowed) { + throw new RuntimeException( + s"PendingErrors overflowed. Failed to write at least ${errors.length} rows " + + s"to Kudu; Sample errors: $sample") + } else { + throw new RuntimeException( + s"Failed to write ${errors.length} rows to Kudu; Sample errors: $sample") + } + } + }) + log.info(s"completed $operation ops: duration histogram: $durationHistogram") + } + + private[spark] def repartitionRows( + rdd: RDD[Row], + tableName: String, + schema: StructType, + writeOptions: KuduWriteOptions): RDD[Row] = { + val partitionCount = getPartitionCount(tableName) + val sparkPartitioner = new Partitioner { + override def numPartitions: Int = partitionCount + override def getPartition(key: Any): Int = { + key.asInstanceOf[(Int, Row)]._1 + } + } + + // Key the rows by the Kudu partition index using the KuduPartitioner and the + // table's primary key. This allows us to re-partition and sort the columns. + val keyedRdd = rdd.mapPartitions { rows => + val table = syncClient.openTable(tableName) + val converter = new RowConverter(table.getSchema, schema, writeOptions.ignoreNull) + val partitioner = new KuduPartitioner.KuduPartitionerBuilder(table).build() + rows.map { row => + val partialRow = converter.toPartialRow(row) + val partitionIndex = partitioner.partitionRow(partialRow) + ((partitionIndex, partialRow.encodePrimaryKey()), row) + } + } + + // Define an implicit Ordering trait for the encoded primary key + // to enable rdd sorting functions below. + implicit val byteArrayOrdering: Ordering[Array[Byte]] = new Ordering[Array[Byte]] { + def compare(x: Array[Byte], y: Array[Byte]): Int = { + ByteArray.compareBinary(x, y) + } + } + + // Partition the rows by the Kudu partition index to ensure the Spark partitions + // match the Kudu partitions. This will make the number of Spark tasks match the number + // of Kudu partitions. Optionally sort while repartitioning. + // TODO: At some point we may want to support more or less tasks while still partitioning. + val shuffledRDD = if (writeOptions.repartitionSort) { + keyedRdd.repartitionAndSortWithinPartitions(sparkPartitioner) + } else { + keyedRdd.partitionBy(sparkPartitioner) + } + // Drop the partitioning key. + shuffledRDD.map { case (_, row) => row } + } + + private def writePartitionRows( + rows: Iterator[Row], + schema: StructType, + tableName: String, + opType: OperationType, + lastPropagatedTimestamp: Long, + writeOptions: KuduWriteOptions): RowErrorsAndOverflowStatus = { + // Since each executor has its own KuduClient, update executor's propagated timestamp + // based on the last one on the driver. + syncClient.updateLastPropagatedTimestamp(lastPropagatedTimestamp) + val table = syncClient.openTable(tableName) + val rowConverter = new RowConverter(table.getSchema, schema, writeOptions.ignoreNull) + val session: KuduSession = syncClient.newSession + session.setFlushMode(FlushMode.AUTO_FLUSH_BACKGROUND) + if (writeOptions.ignoreDuplicateRowErrors) { + log.warn( + "kudu.ignoreDuplicateRowErrors is deprecated and slow. Use the insert_ignore operation instead.") + } + session.setIgnoreAllDuplicateRows(writeOptions.ignoreDuplicateRowErrors) + var numRows = 0 + log.info(s"applying operations of type '${opType.toString}' to table '$tableName'") + val startTime = System.currentTimeMillis() + try { + for (row <- rows) { + if (captureRows) { + rowsAccumulator.add(row) + } + val partialRow = rowConverter.toPartialRow(row) + val operation = opType.operation(table) + operation.setRow(partialRow) + session.apply(operation) + numRows += 1 + } + } finally { + session.close() + // Update timestampAccumulator with the client's last propagated + // timestamp on each executor. + timestampAccumulator.add(syncClient.getLastPropagatedTimestamp) + addForOperation(numRows, opType, tableName) + val elapsedTime = (System.currentTimeMillis() - startTime).toInt + durationHistogram.add(elapsedTime) + log.info(s"applied $numRows ${opType}s to table '$tableName' in ${elapsedTime}ms") + } + session.getPendingErrors + } + + private def getPartitionCount(tableName: String): Int = { + val table = syncClient.openTable(tableName) + val partitioner = new KuduPartitioner.KuduPartitionerBuilder(table).build() + partitioner.numPartitions() + } +} + +private object KuduContext { + val log: Logger = LoggerFactory.getLogger(classOf[KuduContext]) + + /** + * Returns a new Kerberos-authenticated [[Subject]] if the Spark context contains + * principal and keytab options, otherwise returns the currently active subject. + * + * The keytab and principal options should be set when deploying a Spark + * application in cluster mode with Yarn against a secure Kudu cluster. Spark + * internally will grab HDFS and HBase delegation tokens (see + * [[org.apache.spark.deploy.SparkSubmit]]), so we do something similar. + * + * This method can only be called on the driver, where the SparkContext is + * available. + * + * @return A Kerberos-authenticated subject if the Spark context contains + * principal and keytab options, otherwise returns the currently + * active subject + */ + private def getSubject(sc: SparkContext): Subject = { + val subject = Subject.getSubject(AccessController.getContext) + + val principal = + sc.getConf.getOption("spark.yarn.principal").getOrElse(return subject) + val keytab = + sc.getConf.getOption("spark.yarn.keytab").getOrElse(return subject) + + log.info(s"Logging in as principal $principal with keytab $keytab") + + val conf = new Configuration { + override def getAppConfigurationEntry(name: String): Array[AppConfigurationEntry] = { + val options = Map( + "principal" -> principal, + "keyTab" -> keytab, + "useKeyTab" -> "true", + "useTicketCache" -> "false", + "doNotPrompt" -> "true", + "refreshKrb5Config" -> "true" + ) + + Array( + new AppConfigurationEntry( + "com.sun.security.auth.module.Krb5LoginModule", + AppConfigurationEntry.LoginModuleControlFlag.REQUIRED, + options.asJava)) + } + } + + val loginContext = new LoginContext("kudu-spark", new Subject(), null, conf) + loginContext.login() + loginContext.getSubject + } +} + +private object KuduClientCache { + val log: Logger = LoggerFactory.getLogger(KuduClientCache.getClass) + + private case class CacheValue(kuduClient: AsyncKuduClient, shutdownHookHandle: Runnable) + + /** + * Set to + * [[org.apache.spark.util.ShutdownHookManager.DEFAULT_SHUTDOWN_PRIORITY]]. + * The client instances are closed through the JVM shutdown hook + * mechanism in order to make sure that any unflushed writes are cleaned up + * properly. Spark has no shutdown notifications. + */ + private val ShutdownHookPriority = 100 + + private val clientCache = new mutable.HashMap[String, CacheValue]() + + // Visible for testing. + private[kudu] def clearCacheForTests() = { + clientCache.values.foreach { + case cacheValue => + try { + cacheValue.kuduClient.close() + } catch { + case e: Exception => log.warn("Error while shutting down the test client", e); + } + + // A client may only be closed once, so once we've close this client, + // we mustn't close it again at shutdown time. + ShutdownHookManager.get().removeShutdownHook(cacheValue.shutdownHookHandle) + } + clientCache.clear() + } + + def getAsyncClient( + kuduMaster: String, + saslProtocolName: Option[String], + requireAuthentication: Boolean = false, + encryptionPolicy: EncryptionPolicy = EncryptionPolicy.OPTIONAL): AsyncKuduClient = { + clientCache.synchronized { + if (!clientCache.contains(kuduMaster)) { + val builder = new AsyncKuduClient.AsyncKuduClientBuilder(kuduMaster) + if (saslProtocolName.nonEmpty) { + builder.saslProtocolName(saslProtocolName.get) + } + builder.requireAuthentication(requireAuthentication) + builder.encryptionPolicy(encryptionPolicy) + val asyncClient = builder.build() + val hookHandle = new Runnable { + override def run(): Unit = asyncClient.close() + } + ShutdownHookManager.get().addShutdownHook(hookHandle, ShutdownHookPriority) + val cacheValue = CacheValue(asyncClient, hookHandle) + clientCache.put(kuduMaster, cacheValue) + } + return clientCache(kuduMaster).kuduClient + } + } +} diff --git a/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduRDD.scala b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduRDD.scala new file mode 100644 index 0000000000..84b004f222 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduRDD.scala @@ -0,0 +1,172 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import scala.collection.JavaConverters._ +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Row +import org.apache.spark.Partition +import org.apache.spark.SparkContext +import org.apache.spark.TaskContext +import org.apache.spark.util.LongAccumulator +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability +import org.apache.kudu.client +import org.apache.kudu.client._ +import org.apache.kudu.client.KuduScannerIterator.NextRowsCallback + +/** + * A Resilient Distributed Dataset backed by a Kudu table. + * + * To construct a KuduRDD, use [[KuduContext#kuduRDD]] or a Kudu DataSource. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +@SerialVersionUID(1L) +class KuduRDD private[kudu] ( + val kuduContext: KuduContext, + @transient val table: KuduTable, + @transient val projectedCols: Array[String], + @transient val predicates: Array[client.KuduPredicate], + @transient val options: KuduReadOptions, + @transient val sc: SparkContext) + extends RDD[Row](sc, Nil) { + + // Defined here because the options are transient. + private val keepAlivePeriodMs = options.keepAlivePeriodMs + + // A metric for the rows read from Kudu for this RDD. + // TODO(wdberkeley): Add bytes read if it becomes available from the Java client. + private[kudu] val rowsRead = sc.longAccumulator("kudu.rows_read") + + override protected def getPartitions: Array[Partition] = { + val builder = kuduContext.syncClient + .newScanTokenBuilder(table) + .batchSizeBytes(options.batchSize) + .setProjectedColumnNames(projectedCols.toSeq.asJava) + .setFaultTolerant(options.faultTolerantScanner) + .keepAlivePeriodMs(keepAlivePeriodMs) + .cacheBlocks(true) + + // A scan is partitioned to multiple ones. If scan locality is enabled, + // each will take place at the closet replica from the executor. In this + // case, to ensure the consistency of such scan, we use READ_AT_SNAPSHOT + // read mode without setting a timestamp. + builder.replicaSelection(options.scanLocality) + if (options.scanLocality == ReplicaSelection.CLOSEST_REPLICA || + options.snapshotTimestampMs.isDefined) { + builder.readMode(AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT) + } + + options.snapshotTimestampMs.foreach { timestamp => + builder.snapshotTimestampMicros(timestamp * 1000) + } + + options.scanRequestTimeoutMs.foreach { timeout => + builder.scanRequestTimeout(timeout) + } + + options.splitSizeBytes.foreach { size => + builder.setSplitSizeBytes(size) + } + + builder.includeTableMetadata(options.useDriverMetadata) + builder.includeTabletMetadata(options.useDriverMetadata) + + for (predicate <- predicates) { + builder.addPredicate(predicate) + } + + val tokens = builder.build().asScala + tokens.zipWithIndex.map { + case (token, index) => + // Only list the leader replica as the preferred location if + // replica selection policy is leader only, to take advantage + // of scan locality. + val locations = { + if (options.scanLocality == ReplicaSelection.LEADER_ONLY) { + Array(token.getTablet.getLeaderReplica.getRpcHost) + } else { + token.getTablet.getReplicas.asScala.map(_.getRpcHost).toArray + } + } + new KuduPartition(index, token.serialize(), locations) + }.toArray + } + + override def compute(part: Partition, taskContext: TaskContext): Iterator[Row] = { + val client: KuduClient = kuduContext.syncClient + val partition: KuduPartition = part.asInstanceOf[KuduPartition] + val scanner = + KuduScanToken.deserializeIntoScanner(partition.scanToken, client) + // We don't store the RowResult so we can enable the reuseRowResult optimization. + scanner.setReuseRowResult(true) + new RowIterator(scanner, kuduContext, rowsRead) + } + + override def getPreferredLocations(partition: Partition): Seq[String] = { + partition.asInstanceOf[KuduPartition].locations + } +} + +/** + * A Spark SQL [[Partition]] which wraps a [[KuduScanToken]]. + */ +private class KuduPartition( + val index: Int, + val scanToken: Array[Byte], + val locations: Array[String]) + extends Partition {} + +/** + * A Spark SQL [[Row]] iterator which wraps a [[KuduScanner]]. + * @param scanner the wrapped scanner + * @param kuduContext the kudu context + * @param rowsRead an accumulator to track the number of rows read from Kudu + */ +private class RowIterator( + val scanner: KuduScanner, + val kuduContext: KuduContext, + val rowsRead: LongAccumulator) + extends Iterator[Row] { + + private val scannerIterator = scanner.iterator() + private val nextRowsCallback = new NextRowsCallback { + override def call(numRows: Int): Unit = { + if (TaskContext.get().isInterrupted()) { + throw new RuntimeException("Kudu task interrupted") + } + kuduContext.timestampAccumulator.add(kuduContext.syncClient.getLastPropagatedTimestamp) + rowsRead.add(numRows) + } + } + + override def hasNext: Boolean = { + scannerIterator.hasNext(nextRowsCallback) + } + + override def next(): Row = { + val rowResult = scannerIterator.next() + val columnCount = rowResult.getColumnProjection.getColumnCount + val columns = Array.ofDim[Any](columnCount) + for (i <- 0 until columnCount) { + columns(i) = rowResult.getObject(i) + } + Row.fromSeq(columns) + } +} diff --git a/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduReadOptions.scala b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduReadOptions.scala new file mode 100644 index 0000000000..405c58964e --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduReadOptions.scala @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability +import org.apache.kudu.client.AsyncKuduClient +import org.apache.kudu.client.ReplicaSelection +import org.apache.kudu.spark.kudu.KuduReadOptions._ + +/** + * KuduReadOptions holds configuration of reads to Kudu tables. + * + * @param batchSize Sets the maximum number of bytes returned by the scanner, on each batch. + * @param scanLocality If true scan locality is enabled, so that the scan will + * take place at the closest replica + * @param faultTolerantScanner scanner type to be used. Fault tolerant if true, + * otherwise, use non fault tolerant one + * @param keepAlivePeriodMs The period at which to send keep-alive requests to the tablet + * server to ensure that scanners do not time out + * @param scanRequestTimeoutMs Maximum time allowed per scan request, in milliseconds + * @param socketReadTimeoutMs This parameter is deprecated and has no effect + * @param splitSizeBytes Sets the target number of bytes per spark task. If set, tablet's + * primary key range will be split to generate uniform task sizes instead of + * the default of 1 task per tablet. + * @param useDriverMetadata If true, sends the table metadata from the driver to the tasks instead + * of relying on calls to the Kudu master for each task to get the current + * table metadata. + * @param snapshotTimestampMs Sets a timestamp in unixtime milliseconds to use for READ_AT_SNAPSHOT + * to allow repeatable reads. If not set, the timestamp is generated + * by the server. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +case class KuduReadOptions( + batchSize: Int = defaultBatchSize, + scanLocality: ReplicaSelection = defaultScanLocality, + faultTolerantScanner: Boolean = defaultFaultTolerantScanner, + keepAlivePeriodMs: Long = defaultKeepAlivePeriodMs, + scanRequestTimeoutMs: Option[Long] = None, + socketReadTimeoutMs: Option[Long] = None, + splitSizeBytes: Option[Long] = None, + useDriverMetadata: Boolean = defaultUseDriverMetadata, + snapshotTimestampMs: Option[Long] = None) + +object KuduReadOptions { + val defaultBatchSize: Int = 1024 * 1024 * 20 // TODO: Understand/doc this setting? + val defaultScanLocality: ReplicaSelection = ReplicaSelection.CLOSEST_REPLICA + val defaultFaultTolerantScanner: Boolean = false + val defaultKeepAlivePeriodMs: Long = AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS + val defaultUseDriverMetadata: Boolean = true +} diff --git a/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduWriteOptions.scala b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduWriteOptions.scala new file mode 100644 index 0000000000..01aa3d17da --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduWriteOptions.scala @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability + +import org.apache.kudu.spark.kudu.KuduWriteOptions._ + +/** + * KuduWriteOptions holds configuration of writes to Kudu tables. + * + * @param ignoreDuplicateRowErrors when inserting, ignore any new rows that + * have a primary key conflict with existing rows + * @param ignoreNull update only non-Null columns if set true + * @param repartition if set to true, the data will be repartitioned to match the + * partitioning of the target Kudu table + * @param repartitionSort if set to true, the data will also be sorted while being + * repartitioned. This is only used if repartition is true. + * @param handleSchemaDrift if set to true, when fields with names that are not in + * the target Kudu table are encountered, the Kudu table + * will be altered to include new columns for those fields. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +case class KuduWriteOptions( + ignoreDuplicateRowErrors: Boolean = defaultIgnoreDuplicateRowErrors, + ignoreNull: Boolean = defaultIgnoreNull, + repartition: Boolean = defaultRepartition, + repartitionSort: Boolean = defaultRepartitionSort, + handleSchemaDrift: Boolean = defaultHandleSchemaDrift) + +object KuduWriteOptions { + val defaultIgnoreDuplicateRowErrors: Boolean = false + val defaultIgnoreNull: Boolean = false + val defaultRepartition: Boolean = false + val defaultRepartitionSort: Boolean = true + val defaultHandleSchemaDrift: Boolean = false +} diff --git a/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/MapAccumulator.scala b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/MapAccumulator.scala new file mode 100644 index 0000000000..c0fea78bac --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/MapAccumulator.scala @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import java.util.Collections +import java.util.function.BiConsumer +import java.util.function.BiFunction + +import org.apache.spark.util.AccumulatorV2 + +/** + * Spark accumulator implementation that takes 2-tuples as input and + * Map[K, V] as output. The accumulator requires a merge function + * to handle updates to existing entries in the map. + * + * @param mergeFn a function applied to two values for the same Map key + * @tparam K type of the map key + * @tparam V type of the map value + */ +class MapAccumulator[K, V](mergeFn: (V, V) => V) + extends AccumulatorV2[(K, V), java.util.Map[K, V]] { + import MapAccumulator._ + + private val map = Collections.synchronizedMap(new java.util.HashMap[K, V]()) + private val mergeFunc = new SerializableBiFunction[V, V, V] { + override def apply(t: V, u: V): V = mergeFn(t, u) + } + + override def isZero: Boolean = map.isEmpty + + override def copy(): AccumulatorV2[(K, V), java.util.Map[K, V]] = { + val newAcc = new MapAccumulator[K, V](mergeFn) + map.synchronized { + newAcc.map.putAll(map) + } + newAcc + } + + override def reset(): Unit = map.clear() + + override def add(v: (K, V)): Unit = { + map.merge(v._1, v._2, mergeFunc) + } + + override def merge(other: AccumulatorV2[(K, V), java.util.Map[K, V]]): Unit = { + other match { + case o: MapAccumulator[K, V] => + map.synchronized { + o.map.forEach(new BiConsumer[K, V]() { + override def accept(k: K, v: V): Unit = { + add((k, v)) + } + }) + } + case _ => + throw new UnsupportedOperationException( + s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") + } + } + + override def value: java.util.Map[K, V] = map.synchronized { + java.util.Collections.unmodifiableMap[K, V](new java.util.HashMap[K, V](map)) + } +} + +object MapAccumulator { + abstract class SerializableBiFunction[T, U, R] extends BiFunction[T, U, R] with Serializable +} diff --git a/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/OperationType.scala b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/OperationType.scala new file mode 100644 index 0000000000..125ee19828 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/OperationType.scala @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import org.apache.kudu.client.KuduTable +import org.apache.kudu.client.Operation + +/** + * OperationType enumerates the types of Kudu write operations. + */ +private[kudu] sealed trait OperationType { + def operation(table: KuduTable): Operation + + def toString(): String +} +private[kudu] case object Insert extends OperationType { + override def operation(table: KuduTable): Operation = table.newInsert() + + override def toString(): String = "insert" +} +private[kudu] case object InsertIgnore extends OperationType { + override def operation(table: KuduTable): Operation = table.newInsertIgnore() + + override def toString(): String = "insert_ignore" +} +private[kudu] case object Update extends OperationType { + override def operation(table: KuduTable): Operation = table.newUpdate() + + override def toString(): String = "update" +} +private[kudu] case object UpdateIgnore extends OperationType { + override def operation(table: KuduTable): Operation = table.newUpdateIgnore() + + override def toString(): String = "update_ignore" +} +private[kudu] case object Upsert extends OperationType { + override def operation(table: KuduTable): Operation = table.newUpsert() + + override def toString(): String = "upsert" +} +private[kudu] case object Delete extends OperationType { + override def operation(table: KuduTable): Operation = table.newDelete() + + override def toString(): String = "delete" +} +private[kudu] case object DeleteIgnore extends OperationType { + override def operation(table: KuduTable): Operation = table.newDeleteIgnore() + + override def toString(): String = "delete_ignore" +} diff --git a/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/RowConverter.scala b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/RowConverter.scala new file mode 100644 index 0000000000..f4c52c82ba --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/RowConverter.scala @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import org.apache.kudu.Schema +import org.apache.kudu.Type +import org.apache.kudu.client.PartialRow +import org.apache.kudu.client.RowResult +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.CatalystTypeConverters +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.types.DataTypes +import org.apache.spark.sql.types.DecimalType +import org.apache.spark.sql.types.StructType +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability + +@InterfaceAudience.Private +@InterfaceStability.Unstable +class RowConverter(kuduSchema: Schema, schema: StructType, ignoreNull: Boolean) { + + private val typeConverter = CatalystTypeConverters.createToScalaConverter(schema) + private val indices: Array[(Int, Int)] = schema.fields.zipWithIndex.flatMap { + case (field, sparkIdx) => + // Support Spark schemas that have more columns than the Kudu table by + // ignoring missing Kudu columns. + if (kuduSchema.hasColumn(field.name)) { + Some(sparkIdx -> kuduSchema.getColumnIndex(field.name)) + } else None + } + + /** + * Converts a Spark internalRow to a Spark Row. + */ + def toRow(internalRow: InternalRow): Row = { + typeConverter(internalRow).asInstanceOf[Row] + } + + /** + * Converts a Spark row to a Kudu PartialRow. + */ + def toPartialRow(row: Row): PartialRow = { + val partialRow = kuduSchema.newPartialRow() + for ((sparkIdx, kuduIdx) <- indices) { + if (row.isNullAt(sparkIdx)) { + if (kuduSchema.getColumnByIndex(kuduIdx).isKey) { + val key_name = kuduSchema.getColumnByIndex(kuduIdx).getName + throw new IllegalArgumentException(s"Can't set primary key column '$key_name' to null") + } + if (!ignoreNull) partialRow.setNull(kuduIdx) + } else { + schema.fields(sparkIdx).dataType match { + case DataTypes.StringType => + kuduSchema.getColumnByIndex(kuduIdx).getType match { + case Type.STRING => + partialRow.addString(kuduIdx, row.getString(sparkIdx)) + case Type.VARCHAR => + partialRow.addVarchar(kuduIdx, row.getString(sparkIdx)) + case t => + throw new IllegalArgumentException(s"Invalid Kudu column type $t") + } + case DataTypes.BinaryType => + partialRow.addBinary(kuduIdx, row.getAs[Array[Byte]](sparkIdx)) + case DataTypes.BooleanType => + partialRow.addBoolean(kuduIdx, row.getBoolean(sparkIdx)) + case DataTypes.ByteType => + partialRow.addByte(kuduIdx, row.getByte(sparkIdx)) + case DataTypes.ShortType => + partialRow.addShort(kuduIdx, row.getShort(sparkIdx)) + case DataTypes.IntegerType => + partialRow.addInt(kuduIdx, row.getInt(sparkIdx)) + case DataTypes.LongType => + partialRow.addLong(kuduIdx, row.getLong(sparkIdx)) + case DataTypes.FloatType => + partialRow.addFloat(kuduIdx, row.getFloat(sparkIdx)) + case DataTypes.DoubleType => + partialRow.addDouble(kuduIdx, row.getDouble(sparkIdx)) + case DataTypes.TimestampType => + partialRow.addTimestamp(kuduIdx, row.getTimestamp(sparkIdx)) + case DataTypes.DateType => + partialRow.addDate(kuduIdx, row.getDate(sparkIdx)) + case DecimalType() => + partialRow.addDecimal(kuduIdx, row.getDecimal(sparkIdx)) + case t => + throw new IllegalArgumentException(s"No support for Spark SQL type $t") + } + } + } + partialRow + } + + /** + * Converts a Kudu RowResult to a Spark row. + */ + def toRow(rowResult: RowResult): Row = { + val columnCount = rowResult.getColumnProjection.getColumnCount + val columns = Array.ofDim[Any](columnCount) + for (i <- 0 until columnCount) { + columns(i) = rowResult.getObject(i) + } + new GenericRowWithSchema(columns, schema) + } + + /** + * Converts a Kudu PartialRow to a Spark row. + */ + def toRow(partialRow: PartialRow): Row = { + val columnCount = partialRow.getSchema.getColumnCount + val columns = Array.ofDim[Any](columnCount) + for (i <- 0 until columnCount) { + columns(i) = partialRow.getObject(i) + } + new GenericRowWithSchema(columns, schema) + } +} diff --git a/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/SparkUtil.scala b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/SparkUtil.scala new file mode 100644 index 0000000000..d875990f05 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/SparkUtil.scala @@ -0,0 +1,154 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import java.util + +import org.apache.spark.sql.types._ +import org.apache.yetus.audience.InterfaceAudience +import org.apache.yetus.audience.InterfaceStability + +import org.apache.kudu.ColumnTypeAttributes.ColumnTypeAttributesBuilder +import org.apache.kudu.ColumnSchema +import org.apache.kudu.ColumnTypeAttributes +import org.apache.kudu.Schema +import org.apache.kudu.Type + +import scala.jdk.CollectionConverters._ + +@InterfaceAudience.Private +@InterfaceStability.Unstable +object SparkUtil { + + /** + * Converts a Kudu [[Type]] to a Spark SQL [[DataType]]. + * + * @param t the Kudu type + * @param a the Kudu type attributes + * @return the corresponding Spark SQL type + */ + def kuduTypeToSparkType(t: Type, a: ColumnTypeAttributes): DataType = + t match { + case Type.BOOL => BooleanType + case Type.INT8 => ByteType + case Type.INT16 => ShortType + case Type.INT32 => IntegerType + case Type.INT64 => LongType + case Type.UNIXTIME_MICROS => TimestampType + case Type.DATE => DateType + case Type.FLOAT => FloatType + case Type.DOUBLE => DoubleType + case Type.VARCHAR => StringType + case Type.STRING => StringType + case Type.BINARY => BinaryType + case Type.DECIMAL => DecimalType(a.getPrecision, a.getScale) + case _ => + throw new IllegalArgumentException(s"No support for Kudu type $t") + } + + /** + * Converts a Spark SQL [[DataType]] to a Kudu [[Type]]. + * + * @param dt the Spark SQL type + * @return + */ + def sparkTypeToKuduType(dt: DataType): Type = dt match { + case DataTypes.BinaryType => Type.BINARY + case DataTypes.BooleanType => Type.BOOL + case DataTypes.StringType => Type.STRING + case DataTypes.TimestampType => Type.UNIXTIME_MICROS + case DataTypes.DateType => Type.DATE + case DataTypes.ByteType => Type.INT8 + case DataTypes.ShortType => Type.INT16 + case DataTypes.IntegerType => Type.INT32 + case DataTypes.LongType => Type.INT64 + case DataTypes.FloatType => Type.FLOAT + case DataTypes.DoubleType => Type.DOUBLE + case DecimalType() => Type.DECIMAL + case _ => + throw new IllegalArgumentException(s"No support for Spark SQL type $dt") + } + + /** + * Generates a SparkSQL schema from a Kudu schema. + * + * @param kuduSchema the Kudu schema + * @param fields an optional column projection + * @return the SparkSQL schema + */ + def sparkSchema(kuduSchema: Schema, fields: Option[Seq[String]] = None): StructType = { + val kuduColumns = fields match { + case Some(fieldNames) => fieldNames.map(kuduSchema.getColumn) + case None => kuduSchema.getColumns.asScala + } + val sparkColumns = kuduColumns.map { col => + val sparkType = kuduTypeToSparkType(col.getType, col.getTypeAttributes) + StructField(col.getName, sparkType, col.isNullable) + } + StructType(sparkColumns.asJava) + } + + /** + * Generates a Kudu schema from a SparkSQL schema. + * + * @param sparkSchema the SparkSQL schema + * @param keys the ordered names of key columns + * @return the Kudu schema + */ + def kuduSchema(sparkSchema: StructType, keys: Seq[String]): Schema = { + val kuduCols = new util.ArrayList[ColumnSchema]() + // add the key columns first, in the order specified + for (key <- keys) { + val field = sparkSchema.fields(sparkSchema.fieldIndex(key)) + val col = createColumnSchema(field, isKey = true) + kuduCols.add(col) + } + // now add the non-key columns + for (field <- sparkSchema.fields.filter(field => !keys.contains(field.name))) { + val col = createColumnSchema(field, isKey = false) + kuduCols.add(col) + } + new Schema(kuduCols) + } + + /** + * Generates a Kudu column schema from a SparkSQL field. + * + * @param field the SparkSQL field + * @param isKey true if the column is a key + * @return the Kudu column schema + */ + private def createColumnSchema(field: StructField, isKey: Boolean): ColumnSchema = { + val kt = sparkTypeToKuduType(field.dataType) + val col = new ColumnSchema.ColumnSchemaBuilder(field.name, kt) + .key(isKey) + .nullable(field.nullable) + // Add ColumnTypeAttributesBuilder to DECIMAL columns + if (kt == Type.DECIMAL) { + val dt = field.dataType.asInstanceOf[DecimalType] + col.typeAttributes( + new ColumnTypeAttributesBuilder() + .precision(dt.precision) + .scale(dt.scale) + .build() + ) + } + col.build() + } + +} diff --git a/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/package.scala b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/package.scala new file mode 100644 index 0000000000..4f9e7b3df9 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/package.scala @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.DataFrameReader +import org.apache.spark.sql.DataFrameWriter + +package object kudu { + + /** + * Adds a method, `kudu`, to DataFrameReader that allows you to read Kudu tables using + * the DataFrameReader. + */ + implicit class KuduDataFrameReader(reader: DataFrameReader) { + + @deprecated("Use `.format(\"kudu\").load` instead", "1.9.0") + def kudu: DataFrame = reader.format("org.apache.kudu.spark.kudu").load + } + + /** + * Adds a method, `kudu`, to DataFrameWriter that allows writes to Kudu using + * the DataFileWriter + */ + implicit class KuduDataFrameWriter[T](writer: DataFrameWriter[T]) { + + @deprecated("Use `.format(\"kudu\").save` instead", "1.9.0") + def kudu() = writer.format("org.apache.kudu.spark.kudu").save + } +} diff --git a/java-scala-spark4/kudu-spark/src/test/resources/log4j2.properties b/java-scala-spark4/kudu-spark/src/test/resources/log4j2.properties new file mode 100644 index 0000000000..22762a1560 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/test/resources/log4j2.properties @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +status = error +name = PropertiesConfig +appenders = console + +appender.console.type = Console +appender.console.name = STDOUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n + +rootLogger.level = info +rootLogger.appenderRefs = stdout +rootLogger.appenderRef.stdout.ref = STDOUT + +logger.kudu.name = org.apache.kudu +logger.kudu.level = debug diff --git a/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/DefaultSourceTest.scala b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/DefaultSourceTest.scala new file mode 100644 index 0000000000..881d6a1f3b --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/DefaultSourceTest.scala @@ -0,0 +1,1041 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import java.nio.charset.StandardCharsets +import java.util +import scala.collection.JavaConverters._ +import scala.collection.immutable.IndexedSeq +import org.apache.spark.SparkException +import org.apache.spark.sql.Row +import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.SaveMode +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.DataTypes +import org.apache.spark.sql.types.StructField +import org.apache.spark.sql.types.StructType +import org.junit.Assert._ +import org.apache.kudu.client.CreateTableOptions +import org.apache.kudu.test.KuduTestHarness +import org.apache.kudu.test.RandomUtils +import org.apache.kudu.spark.kudu.SparkListenerUtil.withJobTaskCounter +import org.apache.kudu.test.KuduTestHarness.EnableKerberos +import org.apache.kudu.test.KuduTestHarness.MasterServerConfig +import org.apache.kudu.test.KuduTestHarness.TabletServerConfig +import org.junit.Before +import org.junit.Test +import org.scalatest.matchers.should.Matchers + +import scala.util.Random + +class DefaultSourceTest extends KuduTestSuite with Matchers { + val rowCount = 10 + var sqlContext: SQLContext = _ + var rows: IndexedSeq[(Int, Int, String, Long)] = _ + var kuduOptions: Map[String, String] = _ + + @Before + def setUp(): Unit = { + rows = insertRows(table, rowCount) + + sqlContext = ss.sqlContext + + kuduOptions = + Map("kudu.table" -> tableName, "kudu.master" -> harness.getMasterAddressesAsString) + + sqlContext.read + .options(kuduOptions) + .format("kudu") + .load() + .createOrReplaceTempView(tableName) + } + + /** + * A simple test to delete rows from an empty table. + * First delete ignore rows from the empty table. + * Next, insert data to the kudu table and delete all of them afterwards. + * Finally, delete again from the empty table through deleteIngoreRows. + */ + @Test + def testDeleteRowsFromEmptyTable(): Unit = { + val origDf = sqlContext.read.options(kuduOptions).format("kudu").load + val tableName = "testEmptyTable" + if (kuduContext.tableExists(tableName)) { + kuduContext.deleteTable(tableName) + } + kuduContext.createTable( + tableName, + origDf.schema, + Seq("key"), + new CreateTableOptions() + .addHashPartitions(List("key").asJava, harness.getTabletServers.size() * 3)) + val df = sqlContext.read.options(kuduOptions).format("kudu").load + // delete rows from the empty table. + kuduContext.deleteIgnoreRows(df, tableName) + // insert rows. + val newKuduTableOptions = + Map("kudu.table" -> tableName, "kudu.master" -> harness.getMasterAddressesAsString) + val insertsBefore = kuduContext.numInserts.value.get(tableName) + kuduContext.insertRows(df, tableName) + assertEquals(insertsBefore + df.count(), kuduContext.numInserts.value.get(tableName)) + // delete all of the newly inserted rows. + kuduContext.deleteRows(df, tableName) + val newDf = sqlContext.read.options(newKuduTableOptions).format("kudu").load() + assertEquals(newDf.collectAsList().size(), 0) + // delete all of rows again, which is no hurt. + kuduContext.deleteIgnoreRows(df, tableName) + } + + /** + * A simple test with two threads to delete the data from the same table. + * After applying deleteIgnoreRows, there should be no errors even though + * half of the delete operations will be applied on non-existing rows. + */ + @Test + def testDuplicateDelete(): Unit = { + val totalRows = 10000 + val origDf = sqlContext.read.options(kuduOptions).format("kudu").load + insertRows(table, totalRows, rowCount) + val tableName = "testDuplicateDelete" + if (kuduContext.tableExists(tableName)) { + kuduContext.deleteTable(tableName) + } + kuduContext.createTable( + tableName, + origDf.schema, + Seq("key"), + new CreateTableOptions() + .addHashPartitions(List("key").asJava, harness.getTabletServers.size() * 3) + .setNumReplicas(3)) + val newKuduTableOptions = + Map("kudu.table" -> tableName, "kudu.master" -> harness.getMasterAddressesAsString) + kuduContext.insertRows(origDf, tableName) + val df = sqlContext.read.options(newKuduTableOptions).format("kudu").load() + assertEquals(df.collectAsList().size(), totalRows + rowCount) + val deleteThread1 = new Thread { + override def run: Unit = { + kuduContext.deleteIgnoreRows(df, tableName) + } + } + val deleteThread2 = new Thread { + override def run: Unit = { + kuduContext.deleteIgnoreRows(df, tableName) + } + } + deleteThread1.start() + deleteThread2.start() + deleteThread1.join(3000) + deleteThread2.join(3000) + val newDf = sqlContext.read.options(newKuduTableOptions).format("kudu").load() + assertEquals(newDf.collectAsList().size(), 0) + } + + /** + * A simple test to verify the legacy package reader/writer + * syntax still works. This should be removed when the + * deprecated `kudu` methods are removed. + */ + @Test @deprecated("Marked as deprecated to suppress warning", "") + def testPackageReaderAndWriter(): Unit = { + val df = sqlContext.read.options(kuduOptions).kudu + val baseDF = df.limit(1) // filter down to just the first row + // change the c2 string to abc and update + val updateDF = baseDF.withColumn("c2_s", lit("abc")) + updateDF.write.options(kuduOptions).mode("append").kudu + + val newDf = sqlContext.read.options(kuduOptions).kudu + assertFalse(newDf.collect().isEmpty) + } + + @Test + def testTableCreation() { + val tableName = "testcreatetable" + if (kuduContext.tableExists(tableName)) { + kuduContext.deleteTable(tableName) + } + val df = sqlContext.read.options(kuduOptions).format("kudu").load + kuduContext.createTable( + tableName, + df.schema, + Seq("key"), + new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1)) + val insertsBefore = kuduContext.numInserts.value.get(tableName) + kuduContext.insertRows(df, tableName) + assertEquals(insertsBefore + df.count(), kuduContext.numInserts.value.get(tableName)) + + // Now use new options to refer to the new table name. + val newOptions: Map[String, String] = + Map("kudu.table" -> tableName, "kudu.master" -> harness.getMasterAddressesAsString) + val checkDf = sqlContext.read.options(newOptions).format("kudu").load + + assert(checkDf.schema === df.schema) + assertTrue(kuduContext.tableExists(tableName)) + assert(checkDf.count == 10) + + kuduContext.deleteTable(tableName) + assertFalse(kuduContext.tableExists(tableName)) + } + + @Test + def testTableCreationWithPartitioning() { + val tableName = "testcreatepartitionedtable" + if (kuduContext.tableExists(tableName)) { + kuduContext.deleteTable(tableName) + } + val df = sqlContext.read.options(kuduOptions).format("kudu").load + + val kuduSchema = kuduContext.createSchema(df.schema, Seq("key")) + val lower = kuduSchema.newPartialRow() + lower.addInt("key", 0) + val upper = kuduSchema.newPartialRow() + upper.addInt("key", Integer.MAX_VALUE) + + kuduContext.createTable( + tableName, + kuduSchema, + new CreateTableOptions() + .addHashPartitions(List("key").asJava, 2) + .setRangePartitionColumns(List("key").asJava) + .addRangePartition(lower, upper) + .setNumReplicas(1) + ) + val insertsBefore = kuduContext.numInserts.value.get(tableName) + kuduContext.insertRows(df, tableName) + assertEquals(insertsBefore + df.count(), kuduContext.numInserts.value.get(tableName)) + + // now use new options to refer to the new table name + val newOptions: Map[String, String] = + Map("kudu.table" -> tableName, "kudu.master" -> harness.getMasterAddressesAsString) + val checkDf = sqlContext.read.options(newOptions).format("kudu").load + + assert(checkDf.schema === df.schema) + assertTrue(kuduContext.tableExists(tableName)) + assert(checkDf.count == 10) + + kuduContext.deleteTable(tableName) + assertFalse(kuduContext.tableExists(tableName)) + } + + @Test + def testInsertion() { + val insertsBefore = kuduContext.numInserts.value.get(tableName) + println(s"insertsBefore: $insertsBefore") + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val changedDF = df + .limit(1) + .withColumn("key", df("key").plus(100)) + .withColumn("c2_s", lit("abc")) + kuduContext.insertRows(changedDF, tableName) + assertEquals(insertsBefore + changedDF.count(), kuduContext.numInserts.value.get(tableName)) + + val newDF = sqlContext.read.options(kuduOptions).format("kudu").load + val collected = newDF.filter("key = 100").collect() + assertEquals("abc", collected(0).getAs[String]("c2_s")) + + deleteRow(100) + } + + @Test + def testInsertionMultiple() { + val insertsBefore = kuduContext.numInserts.value.get(tableName) + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val changedDF = df + .limit(2) + .withColumn("key", df("key").plus(100)) + .withColumn("c2_s", lit("abc")) + kuduContext.insertRows(changedDF, tableName) + assertEquals(insertsBefore + changedDF.count(), kuduContext.numInserts.value.get(tableName)) + + val newDF = sqlContext.read.options(kuduOptions).format("kudu").load + val collected = newDF.filter("key = 100").collect() + assertEquals("abc", collected(0).getAs[String]("c2_s")) + + val collectedTwo = newDF.filter("key = 101").collect() + assertEquals("abc", collectedTwo(0).getAs[String]("c2_s")) + + deleteRow(100) + deleteRow(101) + } + + @Test + def testInsertionIgnoreRows() { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val baseDF = df.limit(1) // Filter down to just the first row. + + // change the c2 string to abc and insert + val updateDF = baseDF.withColumn("c2_s", lit("abc")) + val kuduWriteOptions = KuduWriteOptions(ignoreDuplicateRowErrors = true) + kuduContext.insertRows(updateDF, tableName, kuduWriteOptions) + + // Change the key and insert. + val insertDF = df + .limit(1) + .withColumn("key", df("key").plus(100)) + .withColumn("c2_s", lit("def")) + kuduContext.insertRows(insertDF, tableName, kuduWriteOptions) + + // Read the data back. + val newDF = sqlContext.read.options(kuduOptions).format("kudu").load + val collectedUpdate = newDF.filter("key = 0").collect() + assertEquals("0", collectedUpdate(0).getAs[String]("c2_s")) + val collectedInsert = newDF.filter("key = 100").collect() + assertEquals("def", collectedInsert(0).getAs[String]("c2_s")) + + // Restore the original state of the table. + deleteRow(100) + } + + @Test + def testInsertIgnoreRowsUsingDefaultSource() { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val baseDF = df.limit(1) // filter down to just the first row + + // change the c2 string to abc and insert + val updateDF = baseDF.withColumn("c2_s", lit("abc")) + val newOptions: Map[String, String] = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.operation" -> "insert", + "kudu.ignoreDuplicateRowErrors" -> "true") + updateDF.write.options(newOptions).mode("append").format("kudu").save + + // change the key and insert + val insertDF = df + .limit(1) + .withColumn("key", df("key").plus(100)) + .withColumn("c2_s", lit("def")) + insertDF.write.options(newOptions).mode("append").format("kudu").save + + // read the data back + val newDF = sqlContext.read.options(kuduOptions).format("kudu").load + val collectedUpdate = newDF.filter("key = 0").collect() + assertEquals("0", collectedUpdate(0).getAs[String]("c2_s")) + val collectedInsert = newDF.filter("key = 100").collect() + assertEquals("def", collectedInsert(0).getAs[String]("c2_s")) + + // restore the original state of the table + deleteRow(100) + } + + @Test + def testInsertIgnoreRowsWriteOption() { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val baseDF = df.limit(1) // filter down to just the first row + + // change the c2 string to abc and insert + val updateDF = baseDF.withColumn("c2_s", lit("abc")) + val newOptions: Map[String, String] = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.operation" -> "insert_ignore") + updateDF.write.options(newOptions).mode("append").format("kudu").save + + // change the key and insert + val insertDF = df + .limit(1) + .withColumn("key", df("key").plus(100)) + .withColumn("c2_s", lit("def")) + insertDF.write.options(newOptions).mode("append").format("kudu").save + + // read the data back + val newDF = sqlContext.read.options(kuduOptions).format("kudu").load + val collectedUpdate = newDF.filter("key = 0").collect() + assertEquals("0", collectedUpdate(0).getAs[String]("c2_s")) + val collectedInsert = newDF.filter("key = 100").collect() + assertEquals("def", collectedInsert(0).getAs[String]("c2_s")) + + // restore the original state of the table + deleteRow(100) + } + + /** + * Identical to the above test, but exercising the old session based insert ignore operations, + * ensuring we functionally support the same semantics. + * Also uses "insert-ignore" instead of "insert_ignore". + */ + @Test + @KuduTestHarness.MasterServerConfig(flags = Array("--master_support_ignore_operations=false")) + def testLegacyInsertIgnoreRowsWriteOption() { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val baseDF = df.limit(1) // filter down to just the first row + + // change the c2 string to abc and insert + val updateDF = baseDF.withColumn("c2_s", lit("abc")) + val newOptions: Map[String, String] = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.operation" -> "insert-ignore") + updateDF.write.options(newOptions).mode("append").format("kudu").save + + // change the key and insert + val insertDF = df + .limit(1) + .withColumn("key", df("key").plus(100)) + .withColumn("c2_s", lit("def")) + insertDF.write.options(newOptions).mode("append").format("kudu").save + + // read the data back + val newDF = sqlContext.read.options(kuduOptions).format("kudu").load + val collectedUpdate = newDF.filter("key = 0").collect() + assertEquals("0", collectedUpdate(0).getAs[String]("c2_s")) + val collectedInsert = newDF.filter("key = 100").collect() + assertEquals("def", collectedInsert(0).getAs[String]("c2_s")) + + // restore the original state of the table + deleteRow(100) + } + + @Test @deprecated("Marked as deprecated to suppress warning", "") + def testInsertIgnoreRowsMethod() { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val baseDF = df.limit(1) // filter down to just the first row + + // change the c2 string to abc and insert + val updateDF = baseDF.withColumn("c2_s", lit("abc")) + kuduContext.insertIgnoreRows(updateDF, tableName) + + // change the key and insert + val insertDF = df + .limit(1) + .withColumn("key", df("key").plus(100)) + .withColumn("c2_s", lit("def")) + kuduContext.insertIgnoreRows(insertDF, tableName) + + // read the data back + val newDF = sqlContext.read.options(kuduOptions).format("kudu").load + val collectedUpdate = newDF.filter("key = 0").collect() + assertEquals("0", collectedUpdate(0).getAs[String]("c2_s")) + val collectedInsert = newDF.filter("key = 100").collect() + assertEquals("def", collectedInsert(0).getAs[String]("c2_s")) + + // restore the original state of the table + deleteRow(100) + } + + @Test + def testUpsertRows() { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val baseDF = df.limit(1) // Filter down to just the first row. + + // Change the c2 string to abc and update. + val upsertDF = baseDF.withColumn("c2_s", lit("abc")) + kuduContext.upsertRows(upsertDF, tableName) + + // Change the key and insert. + val upsertsBefore = kuduContext.numUpserts.value.get(tableName) + val insertDF = df + .limit(1) + .withColumn("key", df("key").plus(100)) + .withColumn("c2_s", lit("def")) + kuduContext.upsertRows(insertDF, tableName) + assertEquals(upsertsBefore + insertDF.count(), kuduContext.numUpserts.value.get(tableName)) + + // Read the data back. + val newDF = sqlContext.read.options(kuduOptions).format("kudu").load + val collectedUpdate = newDF.filter("key = 0").collect() + assertEquals("abc", collectedUpdate(0).getAs[String]("c2_s")) + val collectedInsert = newDF.filter("key = 100").collect() + assertEquals("def", collectedInsert(0).getAs[String]("c2_s")) + + // Restore the original state of the table, and test the numUpdates metric. + val updatesBefore = kuduContext.numUpdates.value.get(tableName) + val updateDF = baseDF.filter("key = 0").withColumn("c2_s", lit("0")) + val updatesApplied = updateDF.count() + kuduContext.updateRows(updateDF, tableName) + assertEquals(updatesBefore + updatesApplied, kuduContext.numUpdates.value.get(tableName)) + deleteRow(100) + } + + @Test + def testMultipleTableOperationCounts() { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + + val tableUpsertsBefore = kuduContext.numUpserts.value.get(tableName) + val simpleTableUpsertsBefore = kuduContext.numUpserts.value.get(simpleTableName) + + // Change the key and insert. + val insertDF = df + .limit(1) + .withColumn("key", df("key").plus(100)) + .withColumn("c2_s", lit("def")) + kuduContext.upsertRows(insertDF, tableName) + + // insert new row to simple table + val insertSimpleDF = sqlContext.createDataFrame(Seq((0, "foo"))).toDF("key", "val") + kuduContext.upsertRows(insertSimpleDF, simpleTableName) + + assertEquals(tableUpsertsBefore + insertDF.count(), kuduContext.numUpserts.value.get(tableName)) + assertEquals( + simpleTableUpsertsBefore + insertSimpleDF.count(), + kuduContext.numUpserts.value.get(simpleTableName)) + + // Restore the original state of the tables, and test the numDeletes metric. + val deletesBefore = kuduContext.numDeletes.value.get(tableName) + val simpleDeletesBefore = kuduContext.numDeletes.value.get(simpleTableName) + kuduContext.deleteRows(insertDF, tableName) + kuduContext.deleteRows(insertSimpleDF, simpleTableName) + assertEquals(deletesBefore + insertDF.count(), kuduContext.numDeletes.value.get(tableName)) + assertEquals( + simpleDeletesBefore + insertSimpleDF.count(), + kuduContext.numDeletes.value.get(simpleTableName)) + } + + @Test + def testWriteWithSink() { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val baseDF = df.limit(1) // Filter down to just the first row. + + // Change the c2 string to abc and upsert. + val upsertDF = baseDF.withColumn("c2_s", lit("abc")) + upsertDF.write + .format("kudu") + .option("kudu.master", harness.getMasterAddressesAsString) + .option("kudu.table", tableName) + // Default kudu.operation is upsert. + .mode(SaveMode.Append) + .save() + + // Change the key and insert. + val insertDF = df + .limit(1) + .withColumn("key", df("key").plus(100)) + .withColumn("c2_s", lit("def")) + insertDF.write + .format("kudu") + .option("kudu.master", harness.getMasterAddressesAsString) + .option("kudu.table", tableName) + .option("kudu.operation", "insert") + .mode(SaveMode.Append) + .save() + + // Read the data back. + val newDF = sqlContext.read.options(kuduOptions).format("kudu").load + val collectedUpdate = newDF.filter("key = 0").collect() + assertEquals("abc", collectedUpdate(0).getAs[String]("c2_s")) + val collectedInsert = newDF.filter("key = 100").collect() + assertEquals("def", collectedInsert(0).getAs[String]("c2_s")) + } + + @Test + def testUpsertRowsIgnoreNulls() { + val nonNullDF = + sqlContext.createDataFrame(Seq((0, "foo"))).toDF("key", "val") + kuduContext.insertRows(nonNullDF, simpleTableName) + + val dataDF = sqlContext.read + .options( + Map("kudu.master" -> harness.getMasterAddressesAsString, "kudu.table" -> simpleTableName)) + .format("kudu") + .load + + val nullDF = sqlContext + .createDataFrame(Seq((0, null.asInstanceOf[String]))) + .toDF("key", "val") + val ignoreNullOptions = KuduWriteOptions(ignoreNull = true) + kuduContext.upsertRows(nullDF, simpleTableName, ignoreNullOptions) + assert(dataDF.collect.toList === nonNullDF.collect.toList) + + val respectNullOptions = KuduWriteOptions(ignoreNull = false) + kuduContext.updateRows(nonNullDF, simpleTableName) + kuduContext.upsertRows(nullDF, simpleTableName, respectNullOptions) + assert(dataDF.collect.toList === nullDF.collect.toList) + + kuduContext.updateRows(nonNullDF, simpleTableName) + kuduContext.upsertRows(nullDF, simpleTableName) + assert(dataDF.collect.toList === nullDF.collect.toList) + + val deleteDF = dataDF.filter("key = 0").select("key") + kuduContext.deleteRows(deleteDF, simpleTableName) + } + + @Test + def testUpsertRowsIgnoreNullsUsingDefaultSource() { + val nonNullDF = + sqlContext.createDataFrame(Seq((0, "foo"))).toDF("key", "val") + kuduContext.insertRows(nonNullDF, simpleTableName) + + val dataDF = sqlContext.read + .options( + Map("kudu.master" -> harness.getMasterAddressesAsString, "kudu.table" -> simpleTableName)) + .format("kudu") + .load + + val nullDF = sqlContext + .createDataFrame(Seq((0, null.asInstanceOf[String]))) + .toDF("key", "val") + val options_0: Map[String, String] = Map( + "kudu.table" -> simpleTableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.ignoreNull" -> "true") + nullDF.write.options(options_0).mode("append").format("kudu").save + assert(dataDF.collect.toList === nonNullDF.collect.toList) + + kuduContext.updateRows(nonNullDF, simpleTableName) + val options_1: Map[String, String] = + Map("kudu.table" -> simpleTableName, "kudu.master" -> harness.getMasterAddressesAsString) + nullDF.write.options(options_1).mode("append").format("kudu").save + assert(dataDF.collect.toList === nullDF.collect.toList) + + val deleteDF = dataDF.filter("key = 0").select("key") + kuduContext.deleteRows(deleteDF, simpleTableName) + } + + @Test + def testRepartition(): Unit = { + runRepartitionTest(false) + } + + @Test + def testRepartitionAndSort(): Unit = { + runRepartitionTest(true) + } + + def runRepartitionTest(repartitionSort: Boolean): Unit = { + // Create a simple table with 2 range partitions split on the value 100. + val tableName = "testRepartition" + val splitValue = 100 + val split = simpleSchema.newPartialRow() + split.addInt("key", splitValue) + val options = new CreateTableOptions() + options.setRangePartitionColumns(List("key").asJava) + options.addSplitRow(split) + val table = kuduClient.createTable(tableName, simpleSchema, options) + + val random = Random.javaRandomToRandom(RandomUtils.getRandom) + val data = random.shuffle( + Seq( + Row.fromSeq(Seq(0, "0")), + Row.fromSeq(Seq(25, "25")), + Row.fromSeq(Seq(50, "50")), + Row.fromSeq(Seq(75, "75")), + Row.fromSeq(Seq(99, "99")), + Row.fromSeq(Seq(100, "100")), + Row.fromSeq(Seq(101, "101")), + Row.fromSeq(Seq(125, "125")), + Row.fromSeq(Seq(150, "150")), + Row.fromSeq(Seq(175, "175")), + Row.fromSeq(Seq(199, "199")) + )) + val dataRDD = ss.sparkContext.parallelize(data, numSlices = 2) + val schema = SparkUtil.sparkSchema(table.getSchema) + val dataDF = ss.sqlContext.createDataFrame(dataRDD, schema) + + // Capture the rows so we can validate the insert order. + kuduContext.captureRows = true + + // Count the number of tasks that end. + val actualNumTasks = withJobTaskCounter(ss.sparkContext) { () => + kuduContext.insertRows( + dataDF, + tableName, + new KuduWriteOptions(repartition = true, repartitionSort = repartitionSort)) + } + + // 2 tasks from the parallelize call, and 2 from the repartitioning. + assertEquals(4, actualNumTasks) + val rows = kuduContext.rowsAccumulator.value.asScala + assertEquals(data.size, rows.size) + assertEquals(data.map(_.getInt(0)).sorted, rows.map(_.getInt(0)).sorted) + + // If repartitionSort is true, verify the rows were sorted while repartitioning. + if (repartitionSort) { + def isSorted(rows: Seq[Int]): Boolean = { + rows.sliding(2).forall(p => (p.size == 1) || p.head < p.tail.head) + } + val (bottomRows, topRows) = rows.map(_.getInt(0)).partition(_ < splitValue) + assertTrue(isSorted(bottomRows.toSeq)) + assertTrue(isSorted(topRows.toSeq)) + } + } + + @Test + def testDeleteRows() { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val deleteDF = df.filter("key = 0").select("key") + val deletesBefore = kuduContext.numDeletes.value.get(tableName) + val deletesApplied = deleteDF.count() + kuduContext.deleteRows(deleteDF, tableName) + assertEquals(deletesBefore + deletesApplied, kuduContext.numDeletes.value.get(tableName)) + + // Read the data back. + val newDF = sqlContext.read.options(kuduOptions).format("kudu").load + val collectedDelete = newDF.filter("key = 0").collect() + assertEquals(0, collectedDelete.length) + + // Restore the original state of the table. + val insertDF = df.limit(1).filter("key = 0") + kuduContext.insertRows(insertDF, tableName) + } + + @Test + def testOutOfOrderSelection() { + val df = + sqlContext.read.options(kuduOptions).format("kudu").load.select("c2_s", "c1_i", "key") + val collected = df.collect() + assert(collected(0).getString(0).equals("0")) + } + + @Test + def testWriteUsingDefaultSource() { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + + val newTable = "testwritedatasourcetable" + kuduContext.createTable( + newTable, + df.schema, + Seq("key"), + new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1)) + + val newOptions: Map[String, String] = + Map("kudu.table" -> newTable, "kudu.master" -> harness.getMasterAddressesAsString) + df.write.options(newOptions).mode("append").format("kudu").save + + val checkDf = sqlContext.read.options(newOptions).format("kudu").load + assert(checkDf.schema === df.schema) + assertTrue(kuduContext.tableExists(newTable)) + assert(checkDf.count == 10) + } + + @Test + def testSchemaDrift() { + val nonNullDF = + sqlContext.createDataFrame(Seq((0, "foo"))).toDF("key", "val") + kuduContext.insertRows(nonNullDF, simpleTableName) + + val tableOptions = Map( + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.table" -> simpleTableName + ) + val df = sqlContext.read.options(tableOptions).format("kudu").load + assertEquals(2, df.schema.fields.length) + + // Add a column not in the table schema by duplicating the val column. + val newDf = df.withColumn("val2", col("val")) + + // Insert with handleSchemaDrift = false. Note that a new column was not created. + kuduContext.upsertRows(newDf, simpleTableName, KuduWriteOptions(handleSchemaDrift = false)) + assertEquals(2, harness.getClient.openTable(simpleTableName).getSchema.getColumns.size()) + + // Insert with handleSchemaDrift = true. Note that a new column was created. + kuduContext.upsertRows(newDf, simpleTableName, KuduWriteOptions(handleSchemaDrift = true)) + assertEquals(3, harness.getClient.openTable(simpleTableName).getSchema.getColumns.size()) + + val afterDf = sqlContext.read.options(tableOptions).format("kudu").load + assertEquals(3, afterDf.schema.fields.length) + assertEquals("val2", afterDf.schema.fieldNames.last) + assertTrue(afterDf.collect().forall(r => r.getString(1) == r.getString(2))) + } + + @Test + def testInsertWrongType() { + val nonNullDF = + sqlContext.createDataFrame(Seq((0, "foo"))).toDF("key", "val") + kuduContext.insertRows(nonNullDF, simpleTableName) + + val tableOptions = Map( + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.table" -> simpleTableName + ) + val df = sqlContext.read.options(tableOptions).format("kudu").load + // Convert the val column to a bytes instead of string. + val toBytes = udf[Array[Byte], String](_.getBytes(StandardCharsets.UTF_8)) + val newDf = df + .withColumn("valTmp", toBytes(col("val"))) + .drop("val") + .withColumnRenamed("valTmp", "val") + + try { + kuduContext.insertRows(newDf, simpleTableName, KuduWriteOptions()) + } catch { + case e: SparkException => + assertTrue(e.getMessage.contains("val isn't [Type: binary], it's string")) + } + } + + @Test + def testCreateRelationWithSchema() { + // user-supplied schema that is compatible with actual schema, but with the key at the end + val userSchema: StructType = StructType( + List( + StructField("c4_long", DataTypes.LongType), + StructField("key", DataTypes.IntegerType) + )) + + val dfDefaultSchema = sqlContext.read.options(kuduOptions).format("kudu").load + assertEquals(16, dfDefaultSchema.schema.fields.length) + + val dfWithUserSchema = + sqlContext.read.options(kuduOptions).schema(userSchema).format("kudu").load + assertEquals(2, dfWithUserSchema.schema.fields.length) + + dfWithUserSchema.limit(10).collect() + assertTrue(dfWithUserSchema.columns.equals(Array("c4_long", "key"))) + } + + @Test + def testCreateRelationWithInvalidSchema() { + // user-supplied schema that is NOT compatible with actual schema + val userSchema: StructType = StructType( + List( + StructField("foo", DataTypes.LongType), + StructField("bar", DataTypes.IntegerType) + )) + + intercept[IllegalArgumentException] { + sqlContext.read.options(kuduOptions).schema(userSchema).format("kudu").load + }.getMessage should include("Unknown column: foo") + } + + // Verify that the propagated timestamp is properly updated inside + // the same client. + @Test + def testTimestampPropagation() { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val insertDF = df + .limit(1) + .withColumn( + "key", + df("key") + .plus(100)) + .withColumn("c2_s", lit("abc")) + + // Initiate a write via KuduContext, and verify that the client should + // have propagated timestamp. + kuduContext.insertRows(insertDF, tableName) + assert(kuduContext.syncClient.getLastPropagatedTimestamp > 0) + var prevTimestamp = kuduContext.syncClient.getLastPropagatedTimestamp + + // Initiate a read via DataFrame, and verify that the client should + // move the propagated timestamp further. + val newDF = sqlContext.read.options(kuduOptions).format("kudu").load + val collected = newDF.filter("key = 100").collect() + assertEquals("abc", collected(0).getAs[String]("c2_s")) + assert(kuduContext.syncClient.getLastPropagatedTimestamp > prevTimestamp) + prevTimestamp = kuduContext.syncClient.getLastPropagatedTimestamp + + // Initiate a read via KuduContext, and verify that the client should + // move the propagated timestamp further. + val rdd = kuduContext.kuduRDD(ss.sparkContext, tableName, List("key")) + assert(rdd.collect.length == 11) + assert(kuduContext.syncClient.getLastPropagatedTimestamp > prevTimestamp) + prevTimestamp = kuduContext.syncClient.getLastPropagatedTimestamp + + // Initiate another write via KuduContext, and verify that the client should + // move the propagated timestamp further. + val updateDF = df + .limit(1) + .withColumn( + "key", + df("key") + .plus(100)) + .withColumn("c2_s", lit("def")) + val kuduWriteOptions = KuduWriteOptions(ignoreDuplicateRowErrors = true) + kuduContext.insertRows(updateDF, tableName, kuduWriteOptions) + assert(kuduContext.syncClient.getLastPropagatedTimestamp > prevTimestamp) + } + + /** + * Verify that the kudu.scanRequestTimeoutMs parameter is parsed by the + * DefaultSource and makes it into the KuduRelation as a configuration + * parameter. + */ + @Test + def testScanRequestTimeoutPropagation() { + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.scanRequestTimeoutMs" -> "66666") + val dataFrame = sqlContext.read.options(kuduOptions).format("kudu").load + val kuduRelation = kuduRelationFromDataFrame(dataFrame) + assert(kuduRelation.readOptions.scanRequestTimeoutMs.contains(66666)) + } + + @Test + def testSnapshotTimestampMsPropagation() { + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.snapshotTimestampMs" -> "86400000000") + val dataFrameSnapshotTimestamp = sqlContext.read.options(kuduOptions).format("kudu").load + val kuduRelationSnapshotTimestamp = kuduRelationFromDataFrame(dataFrameSnapshotTimestamp) + + kuduOptions = + Map("kudu.table" -> tableName, "kudu.master" -> harness.getMasterAddressesAsString) + val dataFrameNoneSnapshotTimestamp = sqlContext.read.options(kuduOptions).format("kudu").load + val kuduRelationNoneSnapshotTimestamp = kuduRelationFromDataFrame( + dataFrameNoneSnapshotTimestamp) + assert(kuduRelationSnapshotTimestamp.readOptions.snapshotTimestampMs.contains(86400000000L)) + assert(kuduRelationNoneSnapshotTimestamp.readOptions.snapshotTimestampMs.isEmpty) + } + + @Test + def testReadDataFrameAtSnapshot() { + insertRows(table, 100, 1) + val timestamp = getLastPropagatedTimestampMs() + insertRows(table, 100, 100) + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.snapshotTimestampMs" -> s"$timestamp") + val dataFrameWithSnapshotTimestamp = sqlContext.read.options(kuduOptions).format("kudu").load + kuduOptions = + Map("kudu.table" -> tableName, "kudu.master" -> harness.getMasterAddressesAsString) + val dataFrameWithoutSnapshotTimestamp = sqlContext.read.options(kuduOptions).format("kudu").load + assertEquals(100, dataFrameWithSnapshotTimestamp.collect().length) + assertEquals(200, dataFrameWithoutSnapshotTimestamp.collect().length) + } + + @Test + def testSnapshotTimestampBeyondMaxAge(): Unit = { + val extraConfigs = new util.HashMap[String, String]() + val tableName = "snapshot_test" + extraConfigs.put("kudu.table.history_max_age_sec", "1"); + kuduClient.createTable( + tableName, + schema, + tableOptions.setExtraConfigs(extraConfigs) + ) + val timestamp = getLastPropagatedTimestampMs() + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.snapshotTimestampMs" -> s"$timestamp") + insertRows(table, 100, 1) + Thread.sleep(2000) + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val exception = intercept[Exception] { + df.count() + } + assertTrue( + exception.getMessage.contains( + "snapshot scan end timestamp is earlier than the ancient history mark") + ) + } + + @Test + def testSnapshotTimestampBeyondCurrentTimestamp(): Unit = { + val timestamp = getLastPropagatedTimestampMs() + 100000 + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.snapshotTimestampMs" -> s"$timestamp") + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val exception = intercept[Exception] { + df.count() + } + assertTrue(exception.getMessage.contains("cannot verify timestamp")) + } + + @Test + @MasterServerConfig( + flags = Array( + "--mock_table_metrics_for_testing=true", + "--on_disk_size_for_testing=1024", + "--live_row_count_for_testing=100" + )) + def testGetTableStatistics(): Unit = { + val dataFrame = sqlContext.read.options(kuduOptions).format("kudu").load + val kuduRelation = kuduRelationFromDataFrame(dataFrame) + assert(kuduRelation.sizeInBytes == 1024) + } + + @Test + @EnableKerberos(principal = "oryx") + def testNonDefaultPrincipal(): Unit = { + KuduClientCache.clearCacheForTests() + val exception = intercept[Exception] { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + df.count() + } + assertTrue(exception.getCause.getMessage.contains("this client is not authenticated")) + + KuduClientCache.clearCacheForTests() + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.saslProtocolName" -> "oryx" + ) + + val df = sqlContext.read.options(kuduOptions).format("kudu").load + assertEquals(rowCount, df.count()) + } + + @Test + def testKuduRequireAuthenticationInsecureCluster(): Unit = { + KuduClientCache.clearCacheForTests() + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.requireAuthentication" -> "true" + ) + val exception = intercept[Exception] { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + df.count + } + assertTrue( + exception.getCause.getMessage + .contains("client requires authentication, but server does not have Kerberos enabled")) + } + + @Test + @MasterServerConfig(flags = Array("--rpc_encryption=disabled", "--rpc_authentication=disabled")) + @TabletServerConfig(flags = Array("--rpc_encryption=disabled", "--rpc_authentication=disabled")) + def testKuduRequireEncryptionInsecureCluster(): Unit = { + KuduClientCache.clearCacheForTests() + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.encryptionPolicy" -> "required_remote" + ) + val exception = intercept[Exception] { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + df.count + } + assertTrue( + exception.getCause.getMessage.contains("server does not support required TLS encryption")) + } + + @Test + @EnableKerberos + def testKuduRequireAuthenticationAndEncryptionSecureCluster(): Unit = { + KuduClientCache.clearCacheForTests() + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.encryptionPolicy" -> "required", + "kudu.requireAuthentication" -> "true" + ) + + val df = sqlContext.read.options(kuduOptions).format("kudu").load + assertEquals(rowCount, df.count) + } + + @Test + @MasterServerConfig(flags = Array("--rpc_encryption=disabled", "--rpc_authentication=disabled")) + @TabletServerConfig(flags = Array("--rpc_encryption=disabled", "--rpc_authentication=disabled")) + def testKuduInsecureCluster(): Unit = { + KuduClientCache.clearCacheForTests() + + val df = sqlContext.read.options(kuduOptions).format("kudu").load + assertEquals(rowCount, df.count) + } +} diff --git a/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/KuduContextTest.scala b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/KuduContextTest.scala new file mode 100644 index 0000000000..d70101632e --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/KuduContextTest.scala @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import java.io.ByteArrayInputStream +import java.io.ByteArrayOutputStream +import java.io.ObjectInputStream +import java.io.ObjectOutputStream +import java.math.BigDecimal +import java.nio.charset.StandardCharsets.UTF_8 +import java.sql.Date +import java.sql.Timestamp + +import org.apache.kudu.util.DateUtil +import org.apache.kudu.util.TimestampUtil +import org.apache.spark.sql.functions.decode +import org.junit.Test +import org.scalatest.matchers.should.Matchers + +class KuduContextTest extends KuduTestSuite with Matchers { + val rowCount = 10 + + private def serialize(value: Any): Array[Byte] = { + val stream: ByteArrayOutputStream = new ByteArrayOutputStream() + val oos = new ObjectOutputStream(stream) + try { + oos.writeObject(value) + stream.toByteArray + } finally { + oos.close() + } + } + + private def deserialize(bytes: Array[Byte]): Any = { + val ois = new ObjectInputStream(new ByteArrayInputStream(bytes)) + try { + ois.readObject + } finally { + ois.close() + } + } + + @Test + def testKuduContextSerialization() { + val serialized = serialize(kuduContext) + KuduClientCache.clearCacheForTests() + val deserialized = deserialize(serialized).asInstanceOf[KuduContext] + assert(deserialized.authnCredentials != null) + // Make a nonsense call just to make sure the re-hydrated client works. + deserialized.tableExists("foo") + } + + @Test + def testBasicKuduRDD() { + val rows = insertRows(table, rowCount) + val scanList = kuduContext + .kuduRDD( + ss.sparkContext, + "test", + Seq( + "key", + "c1_i", + "c2_s", + "c3_double", + "c4_long", + "c5_bool", + "c6_short", + "c7_float", + "c8_binary", + "c9_unixtime_micros", + "c10_byte", + "c11_decimal32", + "c12_decimal64", + "c13_decimal128", + "c14_varchar", + "c15_date" + ) + ) + .map(r => r.toSeq) + .collect() + scanList.foreach(r => { + val index = r.apply(0).asInstanceOf[Int] + assert(r.apply(0).asInstanceOf[Int] == rows.apply(index)._1) + assert(r.apply(1).asInstanceOf[Int] == rows.apply(index)._2) + assert(r.apply(2).asInstanceOf[String] == rows.apply(index)._3) + assert(r.apply(3).asInstanceOf[Double] == rows.apply(index)._2.toDouble) + assert(r.apply(4).asInstanceOf[Long] == rows.apply(index)._2.toLong) + assert(r.apply(5).asInstanceOf[Boolean] == (rows.apply(index)._2 % 2 == 1)) + assert(r.apply(6).asInstanceOf[Short] == rows.apply(index)._2.toShort) + assert(r.apply(7).asInstanceOf[Float] == rows.apply(index)._2.toFloat) + val binaryBytes = s"bytes ${rows.apply(index)._2}".getBytes(UTF_8).toSeq + assert(r.apply(8).asInstanceOf[Array[Byte]].toSeq == binaryBytes) + assert( + r.apply(9).asInstanceOf[Timestamp] == + TimestampUtil.microsToTimestamp(rows.apply(index)._4)) + assert(r.apply(10).asInstanceOf[Byte] == rows.apply(index)._2.toByte) + assert(r.apply(11).asInstanceOf[BigDecimal] == BigDecimal.valueOf(rows.apply(index)._2)) + assert(r.apply(12).asInstanceOf[BigDecimal] == BigDecimal.valueOf(rows.apply(index)._2)) + assert(r.apply(13).asInstanceOf[BigDecimal] == BigDecimal.valueOf(rows.apply(index)._2)) + assert(r.apply(14).asInstanceOf[String] == rows.apply(index)._3) + assert(r.apply(15).asInstanceOf[Date] == DateUtil.epochDaysToSqlDate(rows.apply(index)._2)) + }) + } + + @Test + def testKuduSparkDataFrame() { + insertRows(table, rowCount) + val sqlContext = ss.sqlContext + val dataDF = sqlContext.read + .options(Map("kudu.master" -> harness.getMasterAddressesAsString, "kudu.table" -> "test")) + .format("kudu") + .load + dataDF + .sort("key") + .select("c8_binary") + .first + .get(0) + .asInstanceOf[Array[Byte]] + .shouldBe("bytes 0".getBytes(UTF_8)) + // decode the binary to string and compare + dataDF + .sort("key") + .withColumn("c8_binary", decode(dataDF("c8_binary"), "UTF-8")) + .select("c8_binary") + .first + .get(0) + .shouldBe("bytes 0") + } +} diff --git a/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/KuduRDDTest.scala b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/KuduRDDTest.scala new file mode 100644 index 0000000000..c17edfc4e2 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/KuduRDDTest.scala @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import scala.collection.JavaConverters._ +import org.apache.kudu.ColumnSchema.ColumnSchemaBuilder +import org.apache.kudu.client.CreateTableOptions +import org.apache.kudu.Schema +import org.apache.kudu.Type +import org.apache.kudu.test.KuduTestHarness.TabletServerConfig +import org.apache.spark.SparkException +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Row +import org.junit.Assert.assertEquals +import org.junit.Assert.fail +import org.junit.Test + +class KuduRDDTest extends KuduTestSuite { + + @Test + def testCollectRows() { + insertRows(table, 100) + val rdd = kuduContext.kuduRDD(ss.sparkContext, tableName, List("key")) + assertEquals(100, rdd.collect().length) + assertEquals(100L, rdd.asInstanceOf[KuduRDD].rowsRead.value) + } + + @Test + @TabletServerConfig( + // Hard coded values because Scala doesn't handle array constants in annotations. + flags = Array( + "--scanner_ttl_ms=5000", + "--scanner_gc_check_interval_us=500000" // 10% of the TTL. + )) + def testKeepAlive() { + val rowCount = 500 + val shortScannerTtlMs = 5000L + + // Create a simple table with a single partition. + val tableName = "testKeepAlive" + val tableSchema = { + val columns = List( + new ColumnSchemaBuilder("key", Type.INT32).key(true).build(), + new ColumnSchemaBuilder("val", Type.INT32).build()).asJava + new Schema(columns) + } + val tableOptions = new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1) + val table = kuduClient.createTable(tableName, tableSchema, tableOptions) + + val session = kuduClient.newSession() + Range(0, rowCount).map { i => + val insert = table.newInsert + val row = insert.getRow + row.addInt(0, i) + row.addInt(1, i) + session.apply(insert) + } + session.flush() + + def processRDD(rdd: RDD[Row]): Unit = { + // Ensure reading takes longer than the scanner ttl. + var i = 0 + rdd.foreach { row => + // Sleep for half the ttl for the first few rows. This ensures + // we are on the same tablet and will go past the ttl without + // a new scan request. It also ensures a single row doesn't go + // longer than the ttl. + if (i < 5) { + Thread.sleep(shortScannerTtlMs / 2L) // Sleep for half the ttl. + i = i + 1 + } + } + } + + // Test that a keepAlivePeriodMs less than the scanner ttl is successful. + val goodRdd = kuduContext.kuduRDD( + ss.sparkContext, + tableName, + List("key"), + KuduReadOptions( + batchSize = 100, // Set a small batch size so the first scan doesn't read all the rows. + keepAlivePeriodMs = shortScannerTtlMs / 4L) + ) + processRDD(goodRdd) + + // Test that a keepAlivePeriodMs greater than the scanner ttl fails. + val badRdd = kuduContext.kuduRDD( + ss.sparkContext, + tableName, + List("key"), + KuduReadOptions( + batchSize = 100, // Set a small batch size so the first scan doesn't read all the rows. + keepAlivePeriodMs = shortScannerTtlMs * 2L) + ) + try { + processRDD(badRdd) + fail("Should throw a scanner not found exception") + } catch { + case ex: SparkException => + assert(ex.getMessage.matches("(?s).*Scanner .* not found.*")) + } + } +} diff --git a/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/KuduTestSuite.scala b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/KuduTestSuite.scala new file mode 100644 index 0000000000..6f5c4fe43e --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/KuduTestSuite.scala @@ -0,0 +1,346 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import java.math.BigDecimal +import java.nio.charset.StandardCharsets.UTF_8 +import java.util.Date +import scala.collection.JavaConverters._ +import scala.collection.immutable.IndexedSeq +import org.apache.spark.SparkConf +import org.apache.kudu.ColumnSchema.ColumnSchemaBuilder +import org.apache.kudu.ColumnTypeAttributes.ColumnTypeAttributesBuilder +import org.apache.kudu.client.CreateTableOptions +import org.apache.kudu.client.KuduClient +import org.apache.kudu.client.KuduTable +import org.apache.kudu.client.RangePartitionBound +import org.apache.kudu.client.RangePartitionWithCustomHashSchema +import org.apache.kudu.Schema +import org.apache.kudu.Type +import org.apache.kudu.test.KuduTestHarness +import org.apache.kudu.util.CharUtil +import org.apache.kudu.util.DateUtil +import org.apache.kudu.util.DecimalUtil +import org.apache.kudu.util.HybridTimeUtil +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.SparkSession +import org.junit.After +import org.junit.Before +import org.junit.Rule + +import scala.annotation.meta.getter + +trait KuduTestSuite { + var ss: SparkSession = _ + var kuduClient: KuduClient = _ + var table: KuduTable = _ + var kuduContext: KuduContext = _ + + val tableName: String = "test" + val owner: String = "testuser" + val simpleTableName: String = "simple-test" + val simpleAutoIncrementingTableName: String = "simple-auto-incrementing-test" + + lazy val schema: Schema = { + val columns = List( + new ColumnSchemaBuilder("key", Type.INT32).key(true).build(), + new ColumnSchemaBuilder("c1_i", Type.INT32).build(), + new ColumnSchemaBuilder("c2_s", Type.STRING).nullable(true).build(), + new ColumnSchemaBuilder("c3_double", Type.DOUBLE).build(), + new ColumnSchemaBuilder("c4_long", Type.INT64).build(), + new ColumnSchemaBuilder("c5_bool", Type.BOOL).build(), + new ColumnSchemaBuilder("c6_short", Type.INT16).build(), + new ColumnSchemaBuilder("c7_float", Type.FLOAT).build(), + new ColumnSchemaBuilder("c8_binary", Type.BINARY).build(), + new ColumnSchemaBuilder("c9_unixtime_micros", Type.UNIXTIME_MICROS) + .build(), + new ColumnSchemaBuilder("c10_byte", Type.INT8).build(), + new ColumnSchemaBuilder("c11_decimal32", Type.DECIMAL) + .typeAttributes( + new ColumnTypeAttributesBuilder() + .precision(DecimalUtil.MAX_DECIMAL32_PRECISION) + .build() + ) + .build(), + new ColumnSchemaBuilder("c12_decimal64", Type.DECIMAL) + .typeAttributes( + new ColumnTypeAttributesBuilder() + .precision(DecimalUtil.MAX_DECIMAL64_PRECISION) + .build() + ) + .build(), + new ColumnSchemaBuilder("c13_decimal128", Type.DECIMAL) + .typeAttributes( + new ColumnTypeAttributesBuilder() + .precision(DecimalUtil.MAX_DECIMAL128_PRECISION) + .build() + ) + .build(), + new ColumnSchemaBuilder("c14_varchar", Type.VARCHAR) + .typeAttributes(CharUtil.typeAttributes(CharUtil.MAX_VARCHAR_LENGTH)) + .nullable(true) + .build(), + new ColumnSchemaBuilder("c15_date", Type.DATE).build() + ) + new Schema(columns.asJava) + } + + lazy val simpleSchema: Schema = { + val columns = List( + new ColumnSchemaBuilder("key", Type.INT32).key(true).build(), + new ColumnSchemaBuilder("val", Type.STRING).nullable(true).build()).asJava + new Schema(columns) + } + + lazy val simpleAutoIncrementingSchema: Schema = { + val columns = List( + new ColumnSchemaBuilder("key", Type.INT32).nonUniqueKey(true).build(), + new ColumnSchemaBuilder("val", Type.STRING).nullable(true).build()).asJava + new Schema(columns) + } + + val tableOptions: CreateTableOptions = { + val bottom = schema.newPartialRow() // Unbounded. + val middle = schema.newPartialRow() + middle.addInt("key", 50) + val top = schema.newPartialRow() // Unbounded. + + new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .addRangePartition(bottom, middle) + .addRangePartition(middle, top) + .setOwner(owner) + .setNumReplicas(1) + } + + val tableOptionsWithCustomHashSchema: CreateTableOptions = { + val bottom = schema.newPartialRow() + bottom.addInt("key", 0) + val middle = schema.newPartialRow() + middle.addInt("key", 100) + val top = schema.newPartialRow() + top.addInt("key", 200) + + val columns = List("key").asJava + val partitionFirst = new RangePartitionWithCustomHashSchema( + bottom, + middle, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND) + partitionFirst.addHashPartitions(columns, 2, 0) + val partitionSecond = new RangePartitionWithCustomHashSchema( + middle, + top, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND) + partitionSecond.addHashPartitions(columns, 3, 0) + + new CreateTableOptions() + .setRangePartitionColumns(columns) + .addRangePartition(partitionFirst) + .addRangePartition(partitionSecond) + .addHashPartitions(columns, 4, 0) + .setOwner(owner) + .setNumReplicas(1) + } + + val tableOptionsWithTableAndCustomHashSchema: CreateTableOptions = { + val lowest = schema.newPartialRow() + lowest.addInt("key", 0) + val low = schema.newPartialRow() + low.addInt("key", 100) + val high = schema.newPartialRow() + high.addInt("key", 200) + val highest = schema.newPartialRow() + highest.addInt("key", 300) + + val columns = List("key").asJava + val partitionFirst = new RangePartitionWithCustomHashSchema( + lowest, + low, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND) + partitionFirst.addHashPartitions(columns, 2, 0) + val partitionSecond = new RangePartitionWithCustomHashSchema( + low, + high, + RangePartitionBound.INCLUSIVE_BOUND, + RangePartitionBound.EXCLUSIVE_BOUND) + partitionSecond.addHashPartitions(columns, 3, 0) + + new CreateTableOptions() + .setRangePartitionColumns(columns) + .addRangePartition(partitionFirst) + .addRangePartition(partitionSecond) + .addRangePartition(high, highest) + .addHashPartitions(columns, 4, 0) + .setOwner(owner) + .setNumReplicas(1) + } + + val appID: String = new Date().toString + math + .floor(math.random * 10E4) + .toLong + .toString + + val conf: SparkConf = new SparkConf() + .setMaster("local[*]") + .setAppName("test") + .set("spark.ui.enabled", "false") + .set("spark.app.id", appID) + + // Ensure the annotation is applied to the getter and not the field + // or else Junit will complain that the Rule must be public. + @(Rule @getter) + val harness = new KuduTestHarness() + + @Before + def setUpBase(): Unit = { + ss = SparkSession.builder().config(conf).getOrCreate() + kuduContext = new KuduContext( + harness.getMasterAddressesAsString, + ss.sparkContext, + None, + Some(harness.getPrincipal())) + + // Spark tests should use the client from the kuduContext. + kuduClient = kuduContext.syncClient + + table = kuduClient.createTable(tableName, schema, tableOptions) + + val simpleTableOptions = new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1) + kuduClient.createTable(simpleTableName, simpleSchema, simpleTableOptions) + } + + @After + def tearDownBase() { + if (ss != null) ss.stop() + KuduClientCache.clearCacheForTests() + } + + def deleteRow(key: Int): Unit = { + val kuduSession = kuduClient.newSession() + val delete = table.newDelete() + delete.getRow.addInt(0, key) + kuduSession.apply(delete) + } + + def insertRows( + targetTable: KuduTable, + rowCount: Int, + startIndex: Int = 0): IndexedSeq[(Int, Int, String, Long)] = { + val kuduSession = kuduClient.newSession() + + val rows = Range(startIndex, rowCount + startIndex).map { i => + val insert = targetTable.newInsert + val row = insert.getRow + row.addInt(0, i) + row.addInt(1, i) + row.addDouble(3, i.toDouble) + row.addLong(4, i.toLong) + row.addBoolean(5, i % 2 == 1) + row.addShort(6, i.toShort) + row.addFloat(7, i.toFloat) + row.addBinary(8, s"bytes $i".getBytes(UTF_8)) + val ts = System.currentTimeMillis() * 1000 + row.addLong(9, ts) + row.addByte(10, i.toByte) + row.addDecimal(11, BigDecimal.valueOf(i)) + row.addDecimal(12, BigDecimal.valueOf(i)) + row.addDecimal(13, BigDecimal.valueOf(i)) + row.addDate(15, DateUtil.epochDaysToSqlDate(i)) + + // Sprinkling some nulls so that queries see them. + val s = if (i % 2 == 0) { + row.addString(2, i.toString) + row.addVarchar(14, i.toString) + i.toString + } else { + row.setNull(2) + row.setNull(14) + null + } + + kuduSession.apply(insert) + (i, i, s, ts) + } + rows + } + + def getLastPropagatedTimestampMs(): Long = { + HybridTimeUtil + .HTTimestampToPhysicalAndLogical(kuduClient.getLastPropagatedTimestamp) + .head / 1000 + } + + def upsertRowsWithRowDataSize( + targetTable: KuduTable, + rowCount: Integer, + rowDataSize: Integer): IndexedSeq[(Int, Int, String, Long)] = { + val kuduSession = kuduClient.newSession() + + val rows = Range(0, rowCount).map { i => + val upsert = targetTable.newUpsert + val row = upsert.getRow + row.addInt(0, i) + row.addInt(1, i) + row.addDouble(3, i.toDouble) + row.addLong(4, i.toLong) + row.addBoolean(5, i % 2 == 1) + row.addShort(6, i.toShort) + row.addFloat(7, i.toFloat) + row.addBinary(8, (s"*" * rowDataSize).getBytes(UTF_8)) + val ts = System.currentTimeMillis() * 1000 + row.addLong(9, ts) + row.addByte(10, i.toByte) + row.addDecimal(11, BigDecimal.valueOf(i)) + row.addDecimal(12, BigDecimal.valueOf(i)) + row.addDecimal(13, BigDecimal.valueOf(i)) + row.addVarchar(14, i.toString) + row.addDate(15, DateUtil.epochDaysToSqlDate(i)) + + // Sprinkling some nulls so that queries see them. + val s = if (i % 2 == 0) { + row.addString(2, i.toString) + i.toString + } else { + row.setNull(2) + null + } + + kuduSession.apply(upsert) + (i, i, s, ts) + } + rows + } + + /** + * Assuming that the only part of the logical plan is a Kudu scan, this + * function extracts the KuduRelation from the passed DataFrame for + * testing purposes. + */ + def kuduRelationFromDataFrame(dataFrame: DataFrame) = { + val logicalPlan = dataFrame.queryExecution.logical + val logicalRelation = logicalPlan.asInstanceOf[LogicalRelation] + val baseRelation = logicalRelation.relation + baseRelation.asInstanceOf[KuduRelation] + } +} diff --git a/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/SparkListenerUtil.scala b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/SparkListenerUtil.scala new file mode 100644 index 0000000000..7221efeed7 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/SparkListenerUtil.scala @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import org.apache.kudu.test.junit.AssertHelpers +import org.apache.kudu.test.junit.AssertHelpers.BooleanExpression +import org.apache.spark.SparkContext +import org.apache.spark.scheduler.SparkListener +import org.apache.spark.scheduler.SparkListenerJobEnd +import org.apache.spark.scheduler.SparkListenerJobStart +import org.apache.spark.scheduler.SparkListenerTaskEnd + +import scala.collection.mutable.ListBuffer + +object SparkListenerUtil { + + def withJobTaskCounter(sc: SparkContext)(body: () => Unit): Int = { + // Add a SparkListener to count the number of tasks that end. + var numTasks = 0 + val listener: SparkListener = new SparkListener { + override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { + numTasks += 1 + } + } + withListener(sc, listener)(body) + numTasks + } + + def withJobDescriptionCollector(sc: SparkContext)(body: () => Unit): List[String] = { + // Add a SparkListener to collect the job descriptions. + val jobDescriptions = new ListBuffer[String]() + val listener: SparkListener = new SparkListener { + override def onJobStart(jobStart: SparkListenerJobStart): Unit = { + // TODO: Use SparkContext.SPARK_JOB_DESCRIPTION when public. + val description = jobStart.properties.getProperty("spark.job.description") + if (description != null) { + jobDescriptions += description + } + } + } + withListener(sc, listener)(body) + jobDescriptions.toList + } + + // TODO: Use org.apache.spark.TestUtils.withListener if it becomes public test API + def withListener[L <: SparkListener](sc: SparkContext, listener: L)(body: () => Unit): Unit = { + val jobDoneListener = new JobDoneListener + sc.addSparkListener(jobDoneListener) + sc.addSparkListener(listener) + try { + body() + } finally { + // Because the SparkListener events are processed on an async queue which is behind + // private API, we use the jobEnd event to know that all of the taskEnd events + // must have been processed. + AssertHelpers.assertEventuallyTrue("Spark job did not complete", new BooleanExpression { + override def get(): Boolean = jobDoneListener.isDone + }, 5000) + sc.removeSparkListener(listener) + sc.removeSparkListener(jobDoneListener) + } + } + + private class JobDoneListener extends SparkListener { + var jobDone = false + + override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { + jobDone = true + } + def isDone = jobDone + } +} diff --git a/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/SparkSQLTest.scala b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/SparkSQLTest.scala new file mode 100644 index 0000000000..654f167bdc --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/SparkSQLTest.scala @@ -0,0 +1,534 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import scala.collection.JavaConverters._ +import scala.collection.immutable.IndexedSeq +import scala.util.control.NonFatal +import org.apache.spark.sql.SQLContext +import org.junit.Assert._ +import org.apache.kudu.ColumnSchema.ColumnSchemaBuilder +import org.apache.kudu.client.CreateTableOptions +import org.apache.kudu.Schema +import org.apache.kudu.Type +import org.apache.kudu.spark.kudu.SparkListenerUtil.withJobTaskCounter +import org.apache.kudu.test.KuduTestHarness.MasterServerConfig +import org.apache.kudu.test.KuduTestHarness.TabletServerConfig +import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec +import org.junit.Before +import org.junit.Test +import org.scalatest.matchers.should.Matchers + +class SparkSQLTest extends KuduTestSuite with Matchers { + val rowCount = 10 + var sqlContext: SQLContext = _ + var rows: IndexedSeq[(Int, Int, String, Long)] = _ + var kuduOptions: Map[String, String] = _ + + @Before + def setUp(): Unit = { + rows = insertRows(table, rowCount) + + sqlContext = ss.sqlContext + + kuduOptions = + Map("kudu.table" -> tableName, "kudu.master" -> harness.getMasterAddressesAsString) + + sqlContext.read + .options(kuduOptions) + .format("kudu") + .load() + .createOrReplaceTempView(tableName) + } + + @Test + def testBasicSparkSQL() { + val results = sqlContext.sql("SELECT * FROM " + tableName).collectAsList() + assert(results.size() == rowCount) + + assert(results.get(1).isNullAt(2)) + assert(!results.get(0).isNullAt(2)) + } + + @Test + def testBasicSparkSQLWithProjection() { + val results = sqlContext.sql("SELECT key FROM " + tableName).collectAsList() + assert(results.size() == rowCount) + assert(results.get(0).size.equals(1)) + assert(results.get(0).getInt(0).equals(0)) + } + + @Test + def testBasicSparkSQLWithPredicate() { + val results = sqlContext + .sql("SELECT key FROM " + tableName + " where key=1") + .collectAsList() + assert(results.size() == 1) + assert(results.get(0).size.equals(1)) + assert(results.get(0).getInt(0).equals(1)) + + } + + @Test + def testBasicSparkSQLWithTwoPredicates() { + val results = sqlContext + .sql("SELECT key FROM " + tableName + " where key=2 and c2_s='2'") + .collectAsList() + assert(results.size() == 1) + assert(results.get(0).size.equals(1)) + assert(results.get(0).getInt(0).equals(2)) + } + + @Test + def testBasicSparkSQLWithInListPredicate() { + val keys = Array(1, 5, 7) + val results = sqlContext + .sql(s"SELECT key FROM $tableName where key in (${keys.mkString(", ")})") + .collectAsList() + assert(results.size() == keys.length) + keys.zipWithIndex.foreach { + case (v, i) => + assert(results.get(i).size.equals(1)) + assert(results.get(i).getInt(0).equals(v)) + } + } + + @Test + def testBasicSparkSQLWithInListPredicateOnString() { + val keys = Array(1, 4, 6) + val results = sqlContext + .sql(s"SELECT key FROM $tableName where c2_s in (${keys.mkString("'", "', '", "'")})") + .collectAsList() + assert(results.size() == keys.count(_ % 2 == 0)) + keys.filter(_ % 2 == 0).zipWithIndex.foreach { + case (v, i) => + assert(results.get(i).size.equals(1)) + assert(results.get(i).getInt(0).equals(v)) + } + } + + @Test + def testBasicSparkSQLWithInListAndComparisonPredicate() { + val keys = Array(1, 5, 7) + val results = sqlContext + .sql(s"SELECT key FROM $tableName where key>2 and key in (${keys.mkString(", ")})") + .collectAsList() + assert(results.size() == keys.count(_ > 2)) + keys.filter(_ > 2).zipWithIndex.foreach { + case (v, i) => + assert(results.get(i).size.equals(1)) + assert(results.get(i).getInt(0).equals(v)) + } + } + + @Test + def testBasicSparkSQLWithTwoPredicatesNegative() { + val results = sqlContext + .sql("SELECT key FROM " + tableName + " where key=1 and c2_s='2'") + .collectAsList() + assert(results.size() == 0) + } + + @Test + def testBasicSparkSQLWithTwoPredicatesIncludingString() { + val results = sqlContext + .sql("SELECT key FROM " + tableName + " where c2_s='2'") + .collectAsList() + assert(results.size() == 1) + assert(results.get(0).size.equals(1)) + assert(results.get(0).getInt(0).equals(2)) + } + + @Test + def testBasicSparkSQLWithTwoPredicatesAndProjection() { + val results = sqlContext + .sql("SELECT key, c2_s FROM " + tableName + " where c2_s='2'") + .collectAsList() + assert(results.size() == 1) + assert(results.get(0).size.equals(2)) + assert(results.get(0).getInt(0).equals(2)) + assert(results.get(0).getString(1).equals("2")) + } + + @Test + def testBasicSparkSQLWithTwoPredicatesGreaterThan() { + val results = sqlContext + .sql("SELECT key, c2_s FROM " + tableName + " where c2_s>='2'") + .collectAsList() + assert(results.size() == 4) + assert(results.get(0).size.equals(2)) + assert(results.get(0).getInt(0).equals(2)) + assert(results.get(0).getString(1).equals("2")) + } + + @Test + def testSparkSQLStringStartsWithFilters() { + // This test requires a special table. + val testTableName = "startswith" + val schema = new Schema( + List(new ColumnSchemaBuilder("key", Type.STRING).key(true).build()).asJava) + val tableOptions = new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1) + val testTable = kuduClient.createTable(testTableName, schema, tableOptions) + + val kuduSession = kuduClient.newSession() + val chars = List('a', 'b', '乕', Char.MaxValue, '\u0000') + val keys = for { + x <- chars + y <- chars + z <- chars + w <- chars + } yield Array(x, y, z, w).mkString + keys.foreach { key => + val insert = testTable.newInsert + val row = insert.getRow + row.addString(0, key) + kuduSession.apply(insert) + } + val options: Map[String, String] = + Map("kudu.table" -> testTableName, "kudu.master" -> harness.getMasterAddressesAsString) + sqlContext.read.options(options).format("kudu").load.createOrReplaceTempView(testTableName) + + val checkPrefixCount = { prefix: String => + val results = sqlContext.sql(s"SELECT key FROM $testTableName WHERE key LIKE '$prefix%'") + assertEquals(keys.count(k => k.startsWith(prefix)), results.count()) + } + // empty string + checkPrefixCount("") + // one character + for (x <- chars) { + checkPrefixCount(Array(x).mkString) + } + // all two character combos + for { + x <- chars + y <- chars + } { + checkPrefixCount(Array(x, y).mkString) + } + } + + @Test + def testSparkSQLIsNullPredicate() { + var results = sqlContext + .sql("SELECT key FROM " + tableName + " where c2_s IS NULL") + .collectAsList() + assert(results.size() == 5) + + results = sqlContext + .sql("SELECT key FROM " + tableName + " where key IS NULL") + .collectAsList() + assert(results.isEmpty) + } + + @Test + def testSparkSQLIsNotNullPredicate() { + var results = sqlContext + .sql("SELECT key FROM " + tableName + " where c2_s IS NOT NULL") + .collectAsList() + assert(results.size() == 5) + + results = sqlContext + .sql("SELECT key FROM " + tableName + " where key IS NOT NULL") + .collectAsList() + assert(results.size() == 10) + } + + @Test + def testSQLInsertInto() { + val insertTable = "insertintotest" + + // read 0 rows just to get the schema + val df = sqlContext.sql(s"SELECT * FROM $tableName LIMIT 0") + kuduContext.createTable( + insertTable, + df.schema, + Seq("key"), + new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1)) + + val newOptions: Map[String, String] = + Map("kudu.table" -> insertTable, "kudu.master" -> harness.getMasterAddressesAsString) + sqlContext.read + .options(newOptions) + .format("kudu") + .load + .createOrReplaceTempView(insertTable) + + sqlContext.sql(s"INSERT INTO TABLE $insertTable SELECT * FROM $tableName") + val results = + sqlContext.sql(s"SELECT key FROM $insertTable").collectAsList() + assertEquals(10, results.size()) + } + + @Test + def testSQLInsertOverwriteUnsupported() { + val insertTable = "insertoverwritetest" + + // read 0 rows just to get the schema + val df = sqlContext.sql(s"SELECT * FROM $tableName LIMIT 0") + kuduContext.createTable( + insertTable, + df.schema, + Seq("key"), + new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1)) + + val newOptions: Map[String, String] = + Map("kudu.table" -> insertTable, "kudu.master" -> harness.getMasterAddressesAsString) + sqlContext.read + .options(newOptions) + .format("kudu") + .load + .createOrReplaceTempView(insertTable) + + try { + sqlContext.sql(s"INSERT OVERWRITE TABLE $insertTable SELECT * FROM $tableName") + fail("insert overwrite should throw UnsupportedOperationException") + } catch { + case _: UnsupportedOperationException => // good + case NonFatal(_) => + fail("insert overwrite should throw UnsupportedOperationException") + } + } + + @Test + def testTableScanWithProjection() { + assertEquals(10, sqlContext.sql(s"""SELECT key FROM $tableName""").count()) + } + + @Test + def testTableScanWithProjectionAndPredicateDouble() { + assertEquals( + rows.count { case (_, i, _, _) => i > 5 }, + sqlContext + .sql(s"""SELECT key, c3_double FROM $tableName where c3_double > "5.0"""") + .count()) + } + + @Test + def testTableScanWithProjectionAndPredicateLong() { + assertEquals( + rows.count { case (_, i, _, _) => i > 5 }, + sqlContext + .sql(s"""SELECT key, c4_long FROM $tableName where c4_long > "5"""") + .count()) + } + + @Test + def testTableScanWithProjectionAndPredicateBool() { + assertEquals( + rows.count { case (_, i, _, _) => i % 2 == 0 }, + sqlContext + .sql(s"""SELECT key, c5_bool FROM $tableName where c5_bool = true""") + .count()) + } + + @Test + def testTableScanWithProjectionAndPredicateShort() { + assertEquals( + rows.count { case (_, i, _, _) => i > 5 }, + sqlContext + .sql(s"""SELECT key, c6_short FROM $tableName where c6_short > 5""") + .count()) + + } + + @Test + def testTableScanWithProjectionAndPredicateFloat() { + assertEquals( + rows.count { case (_, i, _, _) => i > 5 }, + sqlContext + .sql(s"""SELECT key, c7_float FROM $tableName where c7_float > 5""") + .count()) + + } + + @Test + def testTableScanWithProjectionAndPredicateDecimal32() { + assertEquals( + rows.count { case (_, i, _, _) => i > 5 }, + sqlContext + .sql(s"""SELECT key, c11_decimal32 FROM $tableName where c11_decimal32 > 5""") + .count()) + } + + @Test + def testTableScanWithProjectionAndPredicateDecimal64() { + assertEquals( + rows.count { case (_, i, _, _) => i > 5 }, + sqlContext + .sql(s"""SELECT key, c12_decimal64 FROM $tableName where c12_decimal64 > 5""") + .count()) + } + + @Test + def testTableScanWithProjectionAndPredicateDecimal128() { + assertEquals( + rows.count { case (_, i, _, _) => i > 5 }, + sqlContext + .sql(s"""SELECT key, c13_decimal128 FROM $tableName where c13_decimal128 > 5""") + .count()) + } + + @Test + def testTableScanWithProjectionAndPredicate() { + assertEquals( + rows.count { case (_, _, s, _) => s != null && s > "5" }, + sqlContext + .sql(s"""SELECT key FROM $tableName where c2_s > "5"""") + .count()) + + assertEquals( + rows.count { case (_, _, s, _) => s != null }, + sqlContext + .sql(s"""SELECT key, c2_s FROM $tableName where c2_s IS NOT NULL""") + .count()) + } + + @Test + def testScanLocality() { + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.scanLocality" -> "closest_replica") + + val table = "scanLocalityTest" + sqlContext.read.options(kuduOptions).format("kudu").load.createOrReplaceTempView(table) + val results = sqlContext.sql(s"SELECT * FROM $table").collectAsList() + assert(results.size() == rowCount) + + assert(!results.get(0).isNullAt(2)) + assert(results.get(1).isNullAt(2)) + } + + @Test + def testTableNonFaultTolerantScan() { + val results = sqlContext.sql(s"SELECT * FROM $tableName").collectAsList() + assert(results.size() == rowCount) + + assert(!results.get(0).isNullAt(2)) + assert(results.get(1).isNullAt(2)) + } + + @Test + def testTableFaultTolerantScan() { + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.faultTolerantScan" -> "true") + + val table = "faultTolerantScanTest" + sqlContext.read.options(kuduOptions).format("kudu").load.createOrReplaceTempView(table) + val results = sqlContext.sql(s"SELECT * FROM $table").collectAsList() + assert(results.size() == rowCount) + + assert(!results.get(0).isNullAt(2)) + assert(results.get(1).isNullAt(2)) + } + + @Test + @TabletServerConfig( + flags = Array( + "--flush_threshold_mb=1", + "--flush_threshold_secs=1", + // Disable rowset compact to prevent DRSs being merged because they are too small. + "--enable_rowset_compaction=false" + )) + def testScanWithKeyRange() { + upsertRowsWithRowDataSize(table, rowCount * 100, 32 * 1024) + + // Wait for mrs flushed + Thread.sleep(5 * 1000) + + kuduOptions = Map( + "kudu.table" -> tableName, + "kudu.master" -> harness.getMasterAddressesAsString, + "kudu.splitSizeBytes" -> "1024") + + // count the number of tasks that end. + val actualNumTasks = withJobTaskCounter(ss.sparkContext) { () => + val t = "scanWithKeyRangeTest" + sqlContext.read.options(kuduOptions).format("kudu").load.createOrReplaceTempView(t) + val results = sqlContext.sql(s"SELECT * FROM $t").collectAsList() + assertEquals(rowCount * 100, results.size()) + } + assert(actualNumTasks > 2) + } + + @Test + @MasterServerConfig( + flags = Array( + "--mock_table_metrics_for_testing=true", + "--on_disk_size_for_testing=1024", + "--live_row_count_for_testing=100" + )) + def testJoinWithTableStatistics(): Unit = { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + + // 1. Create two tables. + val table1 = "table1" + kuduContext.createTable( + table1, + df.schema, + Seq("key"), + new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1)) + val options1: Map[String, String] = + Map("kudu.table" -> table1, "kudu.master" -> harness.getMasterAddressesAsString) + df.write.options(options1).mode("append").format("kudu").save + val df1 = sqlContext.read.options(options1).format("kudu").load + df1.createOrReplaceTempView(table1) + + val table2 = "table2" + kuduContext.createTable( + table2, + df.schema, + Seq("key"), + new CreateTableOptions() + .setRangePartitionColumns(List("key").asJava) + .setNumReplicas(1)) + val options2: Map[String, String] = + Map("kudu.table" -> table2, "kudu.master" -> harness.getMasterAddressesAsString) + df.write.options(options2).mode("append").format("kudu").save + val df2 = sqlContext.read.options(options2).format("kudu").load + df2.createOrReplaceTempView(table2) + + // 2. Get the table statistics of each table and verify. + val relation1 = kuduRelationFromDataFrame(df1) + val relation2 = kuduRelationFromDataFrame(df2) + assert(relation1.sizeInBytes == relation2.sizeInBytes) + assert(relation1.sizeInBytes == 1024) + + // 3. Test join with table size should be able to broadcast. + val sqlStr = s"SELECT * FROM $table1 JOIN $table2 ON $table1.key = $table2.key" + val physical = sqlContext.sql(sqlStr).queryExecution.sparkPlan + val operators = physical.collect { + case j: BroadcastHashJoinExec => j + } + assert(operators.size == 1) + + // Verify result. + val results = sqlContext.sql(sqlStr).collectAsList() + assert(results.size() == rowCount) + } +} diff --git a/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/StreamingTest.scala b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/StreamingTest.scala new file mode 100644 index 0000000000..1e2eaab2e7 --- /dev/null +++ b/java-scala-spark4/kudu-spark/src/test/scala/org/apache/kudu/spark/kudu/StreamingTest.scala @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.spark.kudu + +import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.execution.streaming._ +import org.apache.spark.sql.streaming.OutputMode +import org.junit.Assert.assertEquals +import org.junit.Before +import org.junit.Test + +class StreamingTest extends KuduTestSuite { + + implicit var sqlContext: SQLContext = _ + var kuduOptions: Map[String, String] = _ + + @Before + def setUp(): Unit = { + sqlContext = ss.sqlContext + kuduOptions = + Map("kudu.table" -> simpleTableName, "kudu.master" -> harness.getMasterAddressesAsString) + } + + @Test + def testKuduContextWithSparkStreaming() { + val spark = ss + import spark.implicits._ + val checkpointDir = java.nio.file.Files.createTempDirectory("spark_kudu") + val input = MemoryStream[Int] + val query = input + .toDS() + .map(v => (v + 1, v.toString)) + .toDF("key", "val") + .writeStream + .format("kudu") + .option("kudu.master", harness.getMasterAddressesAsString) + .option("kudu.table", simpleTableName) + .option("checkpointLocation", checkpointDir.toFile.getCanonicalPath) + .outputMode(OutputMode.Update) + .start() + + def verifyOutput(expectedData: Seq[(Int, String)]): Unit = { + val df = sqlContext.read.options(kuduOptions).format("kudu").load + val actual = df.rdd + .map { row => + (row.get(0), row.getString(1)) + } + .collect() + .toSet + assertEquals(actual, expectedData.toSet) + } + input.addData(1, 2, 3) + query.processAllAvailable() + verifyOutput(expectedData = Seq((2, "1"), (3, "2"), (4, "3"))) + query.stop() + } +} diff --git a/java-scala-spark4/kudu-subprocess/build.gradle b/java-scala-spark4/kudu-subprocess/build.gradle new file mode 100644 index 0000000000..1f8745ab4a --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/build.gradle @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +apply from: "$rootDir/gradle/shadow.gradle" + +// Explicitly allow slf4j to be included in this jar. +shadowIncludeSlf4j = true + +dependencies { + compile project(path: ":kudu-proto") + compile(libs.hadoopCommon) { + // Exclude io.netty to ensure we use our own versions. + // The dependency on io.netty is a transitive one through the chain + // hadoop-common --> hadoop-auth --> zookeeper --> io.netty.netty-xxx + exclude group: "io.netty" + // hadoopCommon and rangerPlugin use different versions of jersey. + exclude group: "com.sun.jersey" + // Exclude log4j and slf4j to ensure we use our own versions. + exclude group: "log4j" + exclude group: "org.slf4j" + } + compile libs.protobufJavaUtil + + compile(libs.rangerPlugin) { + // rangerPlugin depends on kafka which includes different versions + // of jersey than rangerPlugin. + exclude group: "org.apache.kafka" + // Exclude log4j and slf4j to ensure we use our own versions. + exclude group: "log4j" + exclude group: "org.slf4j" + } + compile libs.slf4jApi + compile libs.log4jApi + compile libs.log4jCore + compile libs.log4jCompat + compile libs.log4jSlf4jImpl + + // Compiling exact modules from the io.netty group for hadoop-common: + // for some reason, if compiling just the libs.netty umbrella package + // (i.e. io.netty:netty-all), hadoop-common still depends on its own + // version of io.netty:netty-transport-native-epoll. + compile libs.nettyBuffer + compile libs.nettyCommon + compile libs.nettyHandler + compile libs.nettyTransport + compile libs.nettyTransportEpoll + compile libs.nettyTransportUnix + + optional libs.jsr305 + optional libs.yetusAnnotations + + testCompile project(path: ":kudu-test-utils", configuration: "shadow") + testCompile libs.junit + testCompile libs.mockitoCore +} + +// kudu-subprocess has no public Javadoc. +javadoc { + enabled = false +} + +// Skip publishing kudu-subprocess artifact because it's not intended for external use. +uploadArchives.enabled = false +install.enabled = false diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/InboundRequest.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/InboundRequest.java new file mode 100644 index 0000000000..23a9e0ba60 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/InboundRequest.java @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +/** + * Encapsulates a request on the inbound queue. It is expected that the + * SubprocessMetrics have begun timing the time it takes to make + * it through the queue before creating this and putting it onto the queue. + */ +public class InboundRequest { + private final byte[] bytes; + private SubprocessMetrics metrics; + + // TODO(awong): it might be nice if both the request and response spoke the + // same language (e.g. both byte arrays or both protobuf messages). + public InboundRequest(byte[] bytes, SubprocessMetrics metrics) { + this.bytes = bytes; + this.metrics = metrics; + } + + /** + * @return the bytes associated with this request + */ + public byte[] bytes() { + return bytes; + } + + /** + * @return the metrics associated with this request + */ + public SubprocessMetrics metrics() { + return metrics; + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/KuduSubprocessException.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/KuduSubprocessException.java new file mode 100644 index 0000000000..53e11b699f --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/KuduSubprocessException.java @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Any runtime exception(s) thrown by a subprocess. + */ +@InterfaceAudience.Private +public final class KuduSubprocessException extends RuntimeException { + + /** + * Constructs a new runtime exception with the specified detail + * message. + * + * @param message the detail message + */ + public KuduSubprocessException(String message) { + super(message); + } + + /** + * Constructs a new runtime exception with the specified detail + * message and cause. + * + * @param message the detail message + * @param cause the cause + */ + public KuduSubprocessException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageIO.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageIO.java new file mode 100644 index 0000000000..fafa37efce --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageIO.java @@ -0,0 +1,186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Bytes; +import com.google.protobuf.Message; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Util class for reading and writing protobuf messages. + */ +@InterfaceAudience.Private +public class MessageIO { + private final int maxMessageBytes; + private final BufferedInputStream in; + private final BufferedOutputStream out; + + public MessageIO(int maxMessageBytes, + BufferedInputStream in, + BufferedOutputStream out) { + this.maxMessageBytes = maxMessageBytes; + this.in = in; + this.out = out; + } + + /** + * Reads a protobuf message, if any, from the underlying buffered input + * stream. The read is blocking and not atomic (partial reads can occur + * if exceptions occur). As such, users should ensure this is not called + * from multiple threads concurrently. + * + * @return the message in a byte array. + * @throws IOException if this input stream has been closed, an I/O + * error occurs, or fail to read the message + * properly + * @throws KuduSubprocessException if there was an oversized message + * in the stream + */ + @VisibleForTesting + byte[] readBytes() throws IOException { + Preconditions.checkNotNull(in); + // Read four bytes of the message to get the size of the body. + byte[] sizeBytes = new byte[Integer.BYTES]; + doRead(sizeBytes, Integer.BYTES); + int size = bytesToInt(sizeBytes); + if (size > maxMessageBytes) { + // Read out and discard the oversized message, so the channel is available + // for further communication. + doReadAndDiscard(size); + throw new KuduSubprocessException(String.format( + "message size (%d) exceeds maximum message size (%d): message is discarded", + size, maxMessageBytes)); + } + // Read the body based on the size. + byte[] dataBytes = new byte[size]; + doRead(dataBytes, size); + return dataBytes; + } + + /** + * Reads size bytes of data from the underlying buffered input + * stream into the specified byte array, starting at the offset 0. + * The reads are performed until we reach EOF of the stream (when the return + * value of the underlying read method is -1) or when we read more than or + * equal to the size bytes. + * If it fails to read the specified size, IOException is thrown. + * + * @throws IOException if this input stream has been closed, an I/O + * error occurs, or fail to read the specified size + */ + private void doRead(byte[] bytes, int size) throws IOException { + Preconditions.checkNotNull(bytes); + int totalRead = in.read(bytes, 0, size); + do { + int read = in.read(bytes, totalRead, size - totalRead); + if (read == -1) { + break; + } + totalRead += read; + } while (totalRead < size); + if (totalRead != size) { + throw new IOException( + String.format("unable to receive message, expected (%d) bytes " + + "but read (%d) bytes.", size, totalRead)); + } + } + + /** + * Reads size bytes of data from the underlying buffered input + * stream and discards all the bytes read. The reads are performed until we + * reach EOF of the stream (when the return value of the underlying read + * method is -1) or when we read more than or equal to the + * size bytes. + * If it fails to read the specified size, IOException is thrown. + * + * @throws IOException if this input stream has been closed, an I/O + * error occurs, or fail to read the specified size + */ + private void doReadAndDiscard(int size) throws IOException { + byte[] buf = new byte[4096]; + int rem = size; + int toRead = Math.min(4096, rem); + do { + int read = in.read(buf, 0, toRead); + if (read == -1) { + break; + } + rem -= read; + toRead = Math.min(4096, rem); + } while (rem > 0); + if (rem > 0) { + throw new IOException( + String.format("unable to read next chunk of oversized message (%d bytes), " + + "expected %d bytes but read %d bytes", size, size, size - rem)); + } + } + + /** + * Writes a protobuf message to the buffered output stream. Since we flush + * after writing each message, with the underlying buffer size being the + * maximum bytes of a message, the write is atomic. That is if any exceptions + * occur, no partial message will be written to the underlying output stream. + * + * @param message the protobuf message + * @throws IOException if an I/O error occurs + */ + @VisibleForTesting + void writeMessage(Message message) throws IOException { + Preconditions.checkNotNull(out); + byte[] size = intToBytes(message.getSerializedSize()); + byte[] body = message.toByteArray(); + synchronized (out) { + out.write(Bytes.concat(size, body)); + // Always do a flush after write to ensure no partial message is written. + out.flush(); + } + } + + /** + * Converts a four-byte array in big endian order to a 32-bit integer. + * @param data a four-byte array in big endian order + * @return a 32-bit integer + */ + static int bytesToInt(byte[] data) { + return ByteBuffer.wrap(data) + .order(ByteOrder.BIG_ENDIAN) + .getInt(); + } + + /** + * Converts a 32-bit integer to a four bytes array in big endian order. + * @param value a 32-bit integer + * @return a four bytes array in big endian order + */ + @VisibleForTesting + public static byte[] intToBytes(int value) { + return ByteBuffer.allocate(Integer.BYTES) + .order(ByteOrder.BIG_ENDIAN) + .putInt(value) + .array(); + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageParser.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageParser.java new file mode 100644 index 0000000000..8f727bcc07 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageParser.java @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import java.nio.charset.StandardCharsets; +import java.util.concurrent.BlockingQueue; + +import com.google.common.base.Preconditions; +import com.google.protobuf.InvalidProtocolBufferException; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.WireProtocol.AppStatusPB; +import org.apache.kudu.subprocess.Subprocess.SubprocessRequestPB; +import org.apache.kudu.subprocess.Subprocess.SubprocessResponsePB; + +/** + * The {@link MessageParser} class, + * 1. retrieves one message from the inbound queue at a time, + * 2. processes the message and generates a response, + * 3. and then puts the response to the outbound queue. + */ +@InterfaceAudience.Private +class MessageParser implements Runnable { + private static final Logger LOG = LoggerFactory.getLogger(MessageParser.class); + private final BlockingQueue inboundQueue; + private final BlockingQueue outboundQueue; + private final ProtocolHandler protocolHandler; + + MessageParser(BlockingQueue inboundQueue, + BlockingQueue outboundQueue, + ProtocolHandler protocolHandler) { + Preconditions.checkNotNull(inboundQueue); + Preconditions.checkNotNull(outboundQueue); + this.inboundQueue = inboundQueue; + this.outboundQueue = outboundQueue; + this.protocolHandler = protocolHandler; + } + + @Override + public void run() { + while (true) { + InboundRequest req = QueueUtil.take(inboundQueue); + SubprocessMetrics metrics = req.metrics(); + metrics.recordInboundQueueTimeMs(); + + // Record the execution time. + metrics.startTimer(); + SubprocessResponsePB.Builder responseBuilder = parseAndExecuteRequest(req.bytes()); + metrics.recordExecutionTimeMs(); + + // Begin recording the time it takes to make it through the outbound + // queue. The writer thread will record the elapsed time right before + // writing the response to the pipe. + metrics.startTimer(); + QueueUtil.put(outboundQueue, new OutboundResponse(responseBuilder, metrics)); + } + } + + /** + * Returns a response builder with the given error status. + * + * @param errorCode the given error status + * @param resp the message builder + * @return a message with the given error status + */ + static SubprocessResponsePB.Builder builderWithError(AppStatusPB.ErrorCode errorCode, + SubprocessResponsePB.Builder resp) { + Preconditions.checkNotNull(resp); + AppStatusPB.Builder errorBuilder = AppStatusPB.newBuilder(); + errorBuilder.setCode(errorCode); + resp.setError(errorBuilder); + return resp; + } + + /** + * Parses the given protobuf request and executes it, returning a builder for + * the response. If a InvalidProtocolBufferException is thrown, which + * indicates the message is invalid, the builder will contain an error + * message. + * + * @param data the protobuf message + * @return a SubprocessResponsePB + */ + private SubprocessResponsePB.Builder parseAndExecuteRequest(byte[] data) { + SubprocessResponsePB.Builder responseBuilder = SubprocessResponsePB.newBuilder(); + try { + // Parses the data as a message of SubprocessRequestPB type. + SubprocessRequestPB request = SubprocessRequestPB.parser().parseFrom(data); + responseBuilder = protocolHandler.unpackAndExecuteRequest(request); + } catch (InvalidProtocolBufferException e) { + LOG.warn(String.format("%s: %s", "Unable to parse the protobuf message", + new String(data, StandardCharsets.UTF_8)), e); + responseBuilder = builderWithError(AppStatusPB.ErrorCode.ILLEGAL_STATE, responseBuilder); + } + return responseBuilder; + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageReader.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageReader.java new file mode 100644 index 0000000000..99a4dd722a --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageReader.java @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import java.io.EOFException; +import java.io.IOException; +import java.util.concurrent.BlockingQueue; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The {@link MessageReader} class, + * 1. processes a message that reads from the underlying input stream. + * 2. and then puts it to the inbound message queue. + * + * Since {@link MessageIO#readBytes()} is not atomic, the implementation + * of MessageReader is not thread-safe, and thus MessageReader should not + * be called concurrently unless handled by the caller. + */ +@InterfaceAudience.Private +class MessageReader implements Runnable { + private static final Logger LOG = LoggerFactory.getLogger(MessageReader.class); + private final BlockingQueue inboundQueue; + private final MessageIO messageIO; + private final boolean injectInterrupt; + + MessageReader(BlockingQueue inboundQueue, + MessageIO messageIO, + boolean injectInterrupt) { + Preconditions.checkNotNull(inboundQueue); + this.inboundQueue = inboundQueue; + this.messageIO = messageIO; + this.injectInterrupt = injectInterrupt; + } + + @Override + public void run() { + // Inject InterruptedException if needed (for tests only). + if (injectInterrupt) { + Thread.currentThread().interrupt(); + } + while (true) { + // Read the message from the standard input. If fail to read the + // message properly, IOException is thrown. IOException is fatal, + // and should be propagated up the call stack. Retry on IOException + // is not necessary as the error can happen in the middle of message + // reading. + byte[] data; + try { + data = messageIO.readBytes(); + } catch (KuduSubprocessException e) { + LOG.error("{}: continuing", e.getMessage()); + continue; + } catch (IOException e) { + throw new KuduSubprocessException("Unable to read the protobuf message", e); + } + + // Log a warning for empty message which is not expected. + if (data.length == 0) { + LOG.warn("Empty message received."); + continue; + } + SubprocessMetrics metrics = new SubprocessMetrics(inboundQueue); + // Begin recording the time it takes to make it through the inbound + // queue. A parser thread will record the elapsed time right after + // it pulls the request from the queue. + metrics.startTimer(); + QueueUtil.put(inboundQueue, new InboundRequest(data, metrics)); + } + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageWriter.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageWriter.java new file mode 100644 index 0000000000..b5b1c77f00 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/MessageWriter.java @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import java.io.IOException; +import java.util.concurrent.BlockingQueue; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * The {@link MessageWriter} class, + * 1. retrieves one message from the outbound queue at a time, + * 2. and then writes the response to the underlying output stream. + */ +@InterfaceAudience.Private +class MessageWriter implements Runnable { + private final BlockingQueue outboundQueue; + private final MessageIO messageIO; + private final long blockWriteMs; + + MessageWriter(BlockingQueue outboundQueue, + MessageIO messageIO, + long blockWriteMs) { + Preconditions.checkNotNull(outboundQueue); + Preconditions.checkNotNull(messageIO); + this.outboundQueue = outboundQueue; + this.messageIO = messageIO; + this.blockWriteMs = blockWriteMs; + } + + @Override + public void run() { + while (true) { + OutboundResponse resp = QueueUtil.take(outboundQueue); + resp.metrics().recordOutboundQueueTimeMs(); + + // Write the response to the underlying output stream. IOException is fatal, + // and should be propagated up the call stack. + try { + // Block the write for the given milliseconds if needed (for tests only). + // -1 means the write will not be blocked. + if (blockWriteMs != -1) { + Thread.sleep(blockWriteMs); + } + messageIO.writeMessage(resp.buildRespPB(outboundQueue)); + } catch (IOException | InterruptedException e) { + throw new KuduSubprocessException("Unable to write the protobuf message", e); + } + } + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/OutboundResponse.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/OutboundResponse.java new file mode 100644 index 0000000000..b171a1c913 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/OutboundResponse.java @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import java.util.concurrent.BlockingQueue; + +/** + * Encapsulates a response on the outbound queue. It is expected that the + * SubprocessMetrics have begun timing the time it takes to + * make it through the queue. + */ +public class OutboundResponse { + private final Subprocess.SubprocessResponsePB.Builder respBuilder; + private final SubprocessMetrics metrics; + + public OutboundResponse(Subprocess.SubprocessResponsePB.Builder respBuilder, + SubprocessMetrics metrics) { + this.respBuilder = respBuilder; + this.metrics = metrics; + } + + /** + * Builds the final SubprocessResponsePB to send over the pipe. + * This constructs the SubprocessMetricsPB as well, and expects + * that all queue timings have already been recorded. + * @param outboundQueue + * @return the response + */ + public Subprocess.SubprocessResponsePB buildRespPB( + BlockingQueue outboundQueue) { + respBuilder.setMetrics(metrics.buildMetricsPB(outboundQueue)); + return respBuilder.build(); + } + + /** + * @return the metrics associated with this response + */ + public SubprocessMetrics metrics() { + return metrics; + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ProtocolHandler.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ProtocolHandler.java new file mode 100644 index 0000000000..ac9d44b3ad --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ProtocolHandler.java @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import com.google.common.base.Preconditions; +import com.google.protobuf.Any; +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Message; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.subprocess.Subprocess.SubprocessRequestPB; +import org.apache.kudu.subprocess.Subprocess.SubprocessResponsePB; + +/** + * Protocol that represents how to handle a protobuf message. + * + * @param The request protobuf message + * @param The response protobuf message + */ +@InterfaceAudience.Private +public abstract class ProtocolHandler { + + /** + * Unpacks the SubprocessRequestPB message according to the expected request + * type and returns a SubprocessResponsePB builder with the results. + * + * @param request a SubprocessRequestPB message + * @return a SubprocessResponsePB.Builder + * @throws InvalidProtocolBufferException if the protocol message being parsed is invalid + */ + SubprocessResponsePB.Builder unpackAndExecuteRequest(SubprocessRequestPB request) + throws InvalidProtocolBufferException { + Preconditions.checkNotNull(request); + SubprocessResponsePB.Builder builder = SubprocessResponsePB.newBuilder(); + builder.setId(request.getId()); + Class requestType = getRequestClass(); + ResponseT resp = executeRequest(request.getRequest().unpack(requestType)); + builder.setResponse(Any.pack(resp)); + return builder; + } + + /** + * Executes the request and creates a response. + * + * @param request the request message + * @return a response + */ + protected abstract ResponseT executeRequest(RequestT request); + + /** + * Gets the class instance of request message. + * + * @return the request class instance + */ + protected abstract Class getRequestClass(); +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/QueueUtil.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/QueueUtil.java new file mode 100644 index 0000000000..f06efd1adf --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/QueueUtil.java @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import java.util.concurrent.BlockingQueue; + +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Util class for taking and putting messages to a queue. + */ +@InterfaceAudience.Private +public class QueueUtil { + private static final Logger LOG = LoggerFactory.getLogger(QueueUtil.class); + + static DataT take(BlockingQueue queue) { + // Take an element from the queue. If encountered InterruptedException, + // consider it to be fatal (as a signal to shutdown the task), and + // propagate it up the call stack. + DataT data; + try { + data = queue.take(); + if (LOG.isDebugEnabled()) { + LOG.debug("Message: {} has been taken from the queue", data); + } + } catch (InterruptedException e) { + throw new KuduSubprocessException("Unable to take a message from the queue", e); + } + return data; + } + + static void put(BlockingQueue queue, DataT data) { + // Put the message to the queue. If encountered InterruptedException + // during the put, consider it to be fatal (as a signal to shutdown + // the task), and propagate it up the call stack. + try { + queue.put(data); + if (LOG.isDebugEnabled()) { + LOG.debug("Message: {} has been put on the queue", data); + } + } catch (InterruptedException e) { + throw new KuduSubprocessException("Unable to put the message to the queue", e); + } + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessConfiguration.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessConfiguration.java new file mode 100644 index 0000000000..17d6dfa392 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessConfiguration.java @@ -0,0 +1,202 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.RandomAccessFile; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.cli.BasicParser; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.log4j.BasicConfigurator; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Utility class that manages common configurations to run a subprocess. + */ +@InterfaceAudience.Private +public class SubprocessConfiguration { + private int queueSize; + private static final int QUEUE_SIZE_DEFAULT = 100; + private int maxMsgParserThreads; + private static final int MAX_MSG_PARSER_THREADS_DEFAULT = 3; + private int maxMsgBytes; + private String keytabFile; + private static final String KEYTAB_FILE_DEFAULT = ""; + private String servicePrincipal; + private static final String SERVICE_PRINCIPAL_DEFAULT = ""; + private OutputStream outputStream; + + @VisibleForTesting + static final int MAX_MESSAGE_BYTES_DEFAULT = 8 * 1024 * 1024; + + public SubprocessConfiguration(String[] args) { + parse(args); + } + + /** + * @return the output stream to output messages to. + */ + OutputStream getOutputStream() { + return outputStream; + } + + /** + * @return the size of the message queue, or the default value if not + * provided + */ + int getQueueSize() { + return queueSize; + } + + /** + * @return the maximum number of threads in the message parser thread pool, + * or the default value if not provided + */ + int getMaxMsgParserThreads() { + return maxMsgParserThreads; + } + + /** + * @return the maximum bytes of a message, or the default value if not + * provided + */ + int getMaxMessageBytes() { + return maxMsgBytes; + } + + /** + * @return the path to the service keytab file + */ + public String getKeytabFile() { + return keytabFile; + } + + /** + * @return the principal name of the service to load from the keytab file + */ + public String getServicePrincipal() { + return servicePrincipal; + } + + /** + * Parses the arguments according to the specified options. + * + * @param args the subprocess arguments + * @throws KuduSubprocessException if there are any problems encountered + * while parsing the command line interface. + */ + private void parse(String[] args) throws KuduSubprocessException { + Options options = new Options(); + + final String queueSizeLongOpt = "queueSize"; + Option queueSizeOpt = new Option( + "q", queueSizeLongOpt, /* hasArg= */true, + "Maximum number of messages held by the message queue"); + queueSizeOpt.setRequired(false); + options.addOption(queueSizeOpt); + + final String maxMsgParserThreadsLongOpt = "maxMsgParserThreads"; + Option maxThreadsOpt = new Option( + "p", maxMsgParserThreadsLongOpt, /* hasArg= */true, + "Maximum number of threads in the message parser thread pool for subprocess"); + maxThreadsOpt.setRequired(false); + options.addOption(maxThreadsOpt); + + final String maxMsgBytesLongOpt = "maxMsgBytes"; + Option maxMsgOpt = new Option( + "m", maxMsgBytesLongOpt, /* hasArg= */true, + "Maximum bytes of a message for subprocess"); + maxMsgOpt.setRequired(false); + options.addOption(maxMsgOpt); + + final String keytabFileLongOpt = "keytab"; + Option keytabOpt = new Option( + "k", keytabFileLongOpt, /* hasArg= */true, + "The path to the service keytab file"); + keytabOpt.setRequired(false); + options.addOption(keytabOpt); + + final String principalLongOpt = "principal"; + Option principalOpt = new Option( + "i", principalLongOpt, /* hasArg= */true, + "The service principal name to load from the keytab file"); + principalOpt.setRequired(false); + options.addOption(principalOpt); + + final String outputPipeLongOpt = "outputPipe"; + Option outputPipeOpt = new Option( + "o", outputPipeLongOpt, /* hasArg= */ true, + "The pipe to output messages to. If not set, outputs to stdout (this " + + "is generally unsafe and should only be used in tests)"); + outputPipeOpt.setRequired(false); + options.addOption(outputPipeOpt); + + CommandLineParser parser = new BasicParser(); + String outputPipePath; + try { + CommandLine cmd = parser.parse(options, args); + String queueSize = cmd.getOptionValue(queueSizeLongOpt); + this.queueSize = queueSize == null ? + QUEUE_SIZE_DEFAULT : Integer.parseInt(queueSize); + + String maxParserThreads = cmd.getOptionValue(maxMsgParserThreadsLongOpt); + this.maxMsgParserThreads = maxParserThreads == null ? + MAX_MSG_PARSER_THREADS_DEFAULT : Integer.parseInt(maxParserThreads); + + String maxMsgBytes = cmd.getOptionValue(maxMsgBytesLongOpt); + this.maxMsgBytes = maxMsgBytes == null ? + MAX_MESSAGE_BYTES_DEFAULT : Integer.parseInt(maxMsgBytes); + + String keytab = cmd.getOptionValue(keytabFileLongOpt); + this.keytabFile = keytab == null ? + KEYTAB_FILE_DEFAULT : keytab; + + String principal = cmd.getOptionValue(principalLongOpt); + this.servicePrincipal = principal == null ? + SERVICE_PRINCIPAL_DEFAULT : principal; + + outputPipePath = cmd.getOptionValue(outputPipeLongOpt); + } catch (ParseException e) { + throw new KuduSubprocessException("Cannot parse the subprocess command line", e); + } + + try { + if (outputPipePath == null) { + this.outputStream = new SubprocessOutputStream(System.out); + } else { + // If we're not sending messages to System.out, redirect our logs to it. + BasicConfigurator.configure(); + RandomAccessFile outputFile = new RandomAccessFile(new File(outputPipePath), "rw"); + this.outputStream = new FileOutputStream(outputFile.getFD()); + } + } catch (FileNotFoundException e) { + throw new KuduSubprocessException("Output file not found", e); + } catch (IOException e) { + throw new KuduSubprocessException("IO error opening file", e); + } + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessExecutor.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessExecutor.java new file mode 100644 index 0000000000..6d6cccc50d --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessExecutor.java @@ -0,0 +1,193 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.function.Function; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The {@link SubprocessExecutor} class, + * 1. parses the command line to get the configuration, + * 2. has a single reader thread that continuously reads protobuf-based + * messages from stdin and puts the message onto the inbound request + * queue, + * 3. has multiple parser threads that continuously retrieve the messages + * from the inbound queue, process them, and put the responses onto the + * outbound response queue, + * 4. has a single writer thread that continuously retrieves the responses + * from the outbound queue, and writes the responses to stdout. + */ +@InterfaceAudience.Private +public class SubprocessExecutor { + private static final Logger LOG = LoggerFactory.getLogger(SubprocessExecutor.class); + private final Function errorHandler; + private boolean injectInterrupt = false; + private long blockWriteMs = -1; + private BlockingQueue outboundQueue; + private BlockingQueue inboundQueue; + + public SubprocessExecutor() { + errorHandler = (t) -> { + // If unexpected exception(s) are thrown by any of the tasks, this error + // handler wraps the throwable in a runtime exception and rethrows, + // causing the program to exit with a nonzero status code. + throw new RuntimeException(t); + }; + } + + @VisibleForTesting + public SubprocessExecutor(Function errorHandler) { + this.errorHandler = errorHandler; + } + + /** + * Executes the subprocess with the given arguments and protocol processor. + * + * @param conf the subprocess configuration + * @param protocolHandler the subprocess protocol handler + * @param timeoutMs the maximum time to wait for subprocess tasks to finish, -1 means + * no time out and the tasks will continue execute until it finishes + * @throws ExecutionException if any tasks of the subprocess completed exceptionally + * @throws InterruptedException if the current thread was interrupted while waiting + * @throws TimeoutException if the wait timed out + */ + @VisibleForTesting + public void run(SubprocessConfiguration conf, ProtocolHandler protocolHandler, long timeoutMs) + throws InterruptedException, ExecutionException, TimeoutException { + int maxMsgParserThread = conf.getMaxMsgParserThreads(); + int queueSize = conf.getQueueSize(); + int maxMessageBytes = conf.getMaxMessageBytes(); + + inboundQueue = new ArrayBlockingQueue<>(queueSize, /* fair= */true); + outboundQueue = new ArrayBlockingQueue<>(queueSize, /* fair= */true); + ExecutorService readerService = Executors.newSingleThreadExecutor(); + ExecutorService parserService = Executors.newFixedThreadPool(maxMsgParserThread); + ExecutorService writerService = Executors.newSingleThreadExecutor(); + + // Wrap the system output in a SubprocessOutputStream so IOExceptions + // from system output are propagated up instead of being silently swallowed. + // Note that the BufferedOutputStream is initiated with the maximum bytes of + // a message to ensure the underlying buffer can hold the entire message before + // flushing. + try (BufferedInputStream in = new BufferedInputStream(System.in); + BufferedOutputStream out = new BufferedOutputStream(conf.getOutputStream(), + maxMessageBytes)) { + MessageIO messageIO = new MessageIO(maxMessageBytes, in, out); + + // Start a single reader thread and run the task asynchronously. + MessageReader reader = new MessageReader(inboundQueue, messageIO, injectInterrupt); + CompletableFuture readerFuture = CompletableFuture.runAsync(reader, readerService); + readerFuture.exceptionally(errorHandler); + // Force the program to exit when reader future completes. + // + // TODO(abukor): Refactor code so the futures can be cancelled instead + // of having to call System.exit() + readerFuture = readerFuture.thenRun(() -> System.exit(0)); + + // Start multiple message parser threads and run the tasks asynchronously. + MessageParser parser = new MessageParser(inboundQueue, outboundQueue, protocolHandler); + List> parserFutures = new ArrayList<>(); + for (int i = 0; i < maxMsgParserThread; i++) { + CompletableFuture parserFuture = CompletableFuture.runAsync(parser, parserService); + parserFuture.exceptionally(errorHandler); + parserFutures.add(parserFuture); + } + + // Start a single writer thread and run the task asynchronously. + MessageWriter writer = new MessageWriter(outboundQueue, messageIO, blockWriteMs); + CompletableFuture writerFuture = CompletableFuture.runAsync(writer, writerService); + writerFuture.exceptionally(errorHandler); + + // Wait until the tasks finish execution. A timeout of -1 means the reader, parser, + // and writer tasks should continue until finished. In cases where we don't want + // the tasks to run forever, e.g. in tests, wait for the specified + // timeout. + if (timeoutMs == -1) { + readerFuture.join(); + writerFuture.join(); + CompletableFuture.allOf(parserFutures.toArray(new CompletableFuture[0])).join(); + } else { + readerFuture.get(timeoutMs, TimeUnit.MILLISECONDS); + writerFuture.get(timeoutMs, TimeUnit.MILLISECONDS); + CompletableFuture.allOf(parserFutures.toArray(new CompletableFuture[0])) + .get(timeoutMs, TimeUnit.MILLISECONDS); + } + } catch (IOException e) { + LOG.error("Unable to close the underlying stream", e); + } + } + + /** + * Returns the outbound message queue. + */ + @VisibleForTesting + public BlockingQueue getOutboundQueue() { + return outboundQueue; + } + + /** + * Sets the interruption flag to true. + */ + @VisibleForTesting + public void interrupt() { + injectInterrupt = true; + } + + /** + * Blocks the message write for the given milliseconds. + */ + @VisibleForTesting + public void blockWriteMs(long blockWriteMs) { + this.blockWriteMs = blockWriteMs; + } + + /** + * Wrapper around run() that runs until 'timeoutMs' elapses, + * catches any timeout exceptions, and returns. + * + * Used in tests. + * TODO(awong): it'd be nice if we had a nicer way to shut down the executor. + */ + public void runUntilTimeout(String[] args, ProtocolHandler handler, long timeoutMs) + throws ExecutionException, InterruptedException { + Preconditions.checkArgument(timeoutMs != -1); + try { + run(new SubprocessConfiguration(args), handler, timeoutMs); + } catch (TimeoutException e) { + // no-op + } + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessMetrics.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessMetrics.java new file mode 100644 index 0000000000..6f2e7e437d --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessMetrics.java @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.TimeUnit; + +import com.google.common.base.Preconditions; +import com.google.common.base.Stopwatch; + +/** + * Encapsulates the metrics associated with the subprocess. It is expected that + * this is passed around alongside each request/response as it makes its way + * through the different stages of the SubprocessExecutor, and for callers to + * call startTimer() and the various record methods as appropriate. + */ +public class SubprocessMetrics { + private final Subprocess.SubprocessMetricsPB.Builder builder; + private final Stopwatch stopwatch; + private final BlockingQueue inboundQueue; + + /** + * Construct a SubprocessMetrics object. + * + * @param inboundQueue used to determine the length of the inbound queue + */ + public SubprocessMetrics(BlockingQueue inboundQueue) { + this.inboundQueue = inboundQueue; + builder = Subprocess.SubprocessMetricsPB.newBuilder(); + stopwatch = Stopwatch.createUnstarted(); + } + + public void startTimer() { + stopwatch.start(); + } + + /** + * Stops the stopwatch and records the amount of time elapsed into the + * metrics builder, with the assumption that it was started upon placing an + * element on the inbound queue. + */ + public void recordInboundQueueTimeMs() { + Preconditions.checkArgument(!builder.hasInboundQueueTimeMs()); + Preconditions.checkArgument(stopwatch.isRunning()); + builder.setInboundQueueTimeMs(stopwatch.elapsed(TimeUnit.MILLISECONDS)); + // We'll continue to use the timer as it makes its way through the + // execution lifecycle, so reset it here. + stopwatch.reset(); + } + + /** + * Stops the stopwatch and records the amount of time elapsed into the + * metrics builder, with the assumption that it was started upon beginning + * to execute. + */ + public void recordExecutionTimeMs() { + Preconditions.checkArgument(!builder.hasExecutionTimeMs()); + Preconditions.checkArgument(stopwatch.isRunning()); + builder.setExecutionTimeMs(stopwatch.elapsed(TimeUnit.MILLISECONDS)); + // We'll continue to use the timer as it makes its way through the + // execution lifecycle, so reset it here. + stopwatch.reset(); + } + + /** + * Stops the stopwatch and records the amount of time elapsed into the + * metrics builder, with the assumption that it was started upon placing an + * element on the outbound queue. + */ + public void recordOutboundQueueTimeMs() { + Preconditions.checkArgument(!builder.hasOutboundQueueTimeMs()); + Preconditions.checkArgument(stopwatch.isRunning()); + builder.setOutboundQueueTimeMs(stopwatch.elapsed(TimeUnit.MILLISECONDS)); + stopwatch.stop(); + } + + /** + * Builds the metrics protobuf message with the recorded timings and the + * current lengths of the message queues. + * + * @param outboundQueue used to determine the length of the outbound queue + * @return the constructed SubprocessMetricsPB + */ + public Subprocess.SubprocessMetricsPB buildMetricsPB( + BlockingQueue outboundQueue) { + Preconditions.checkArgument(builder.hasInboundQueueTimeMs()); + Preconditions.checkArgument(builder.hasExecutionTimeMs()); + Preconditions.checkArgument(builder.hasOutboundQueueTimeMs()); + builder.setInboundQueueLength(inboundQueue.size()); + builder.setOutboundQueueLength(outboundQueue.size()); + return builder.build(); + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessOutputStream.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessOutputStream.java new file mode 100644 index 0000000000..5690562064 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/SubprocessOutputStream.java @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; + +import com.google.common.annotations.VisibleForTesting; + +/** + * Wrapper around {@link java.io.PrintStream} that throws an + * IOException instead of relying on explicit + * checkError calls when writing or flushing to + * the stream. This makes its error-throwing behavior more + * similar to most {@link java.io.OutputStream}. + */ +public class SubprocessOutputStream extends OutputStream { + private final PrintStream out; + + @VisibleForTesting + public static final String WRITE_ERR = "Unable to write to print stream"; + private static final String FLUSH_ERR = "Unable to flush to print stream"; + + public SubprocessOutputStream(PrintStream out) { + this.out = out; + } + + @Override + public void write(int b) throws IOException { + out.write(b); + if (out.checkError()) { + throw new IOException(WRITE_ERR); + } + } + + @Override + public void write(byte[] buf, int off, int len) throws IOException { + out.write(buf, off, len); + if (out.checkError()) { + throw new IOException(WRITE_ERR); + } + } + + @Override + public void write(byte[] b) throws IOException { + out.write(b); + if (out.checkError()) { + throw new IOException(WRITE_ERR); + } + } + + @Override + public void flush() throws IOException { + if (out.checkError()) { + throw new IOException(FLUSH_ERR); + } + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/echo/EchoProtocolHandler.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/echo/EchoProtocolHandler.java new file mode 100644 index 0000000000..53b3c43d17 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/echo/EchoProtocolHandler.java @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess.echo; + +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.subprocess.ProtocolHandler; +import org.apache.kudu.subprocess.Subprocess.EchoRequestPB; +import org.apache.kudu.subprocess.Subprocess.EchoResponsePB; + +/** + * Class that processes a EchoRequest and simply echoes the request + * as a response. + */ +@InterfaceAudience.Private +class EchoProtocolHandler extends ProtocolHandler { + + @Override + protected EchoResponsePB executeRequest(EchoRequestPB request) { + if (request.hasSleepMs()) { + try { + Thread.sleep(request.getSleepMs()); + } catch (Exception e) { + // no-op + } + } + EchoResponsePB.Builder respBuilder = EchoResponsePB.newBuilder(); + respBuilder.setData(request.getData()); + return respBuilder.build(); + } + + @Override + protected Class getRequestClass() { + return EchoRequestPB.class; + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/echo/EchoSubprocessMain.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/echo/EchoSubprocessMain.java new file mode 100644 index 0000000000..f884c3d1ba --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/echo/EchoSubprocessMain.java @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess.echo; + +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.subprocess.SubprocessConfiguration; +import org.apache.kudu.subprocess.SubprocessExecutor; + +@InterfaceAudience.Private +class EchoSubprocessMain { + + public static void main(String[] args) throws Exception { + SubprocessExecutor subprocessExecutor = new SubprocessExecutor(); + EchoProtocolHandler protocolHandler = new EchoProtocolHandler(); + subprocessExecutor.run(new SubprocessConfiguration(args), protocolHandler, /* timeoutMs= */-1); + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ranger/RangerProtocolHandler.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ranger/RangerProtocolHandler.java new file mode 100644 index 0000000000..6d6ef9c572 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ranger/RangerProtocolHandler.java @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess.ranger; + +import org.apache.ranger.plugin.policyengine.RangerAccessResult; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.ranger.Ranger; +import org.apache.kudu.ranger.Ranger.RangerRequestListPB; +import org.apache.kudu.ranger.Ranger.RangerResponseListPB; +import org.apache.kudu.ranger.Ranger.RangerResponsePB; +import org.apache.kudu.subprocess.ProtocolHandler; +import org.apache.kudu.subprocess.ranger.authorization.RangerKuduAuthorizer; + +/** + * Class that sends requests to Ranger and gets authorization decision + * (e.g. allow or deny) as a response. + */ +@InterfaceAudience.Private +class RangerProtocolHandler extends ProtocolHandler { + // The Ranger Kudu authorizer plugin. This field is not final + // as it is used in the mock test. + @InterfaceAudience.LimitedPrivate("Test") + static RangerKuduAuthorizer authz = new RangerKuduAuthorizer(); + + RangerProtocolHandler(String servicePrincipal, String keytab) { + authz.init(servicePrincipal, keytab); + } + + @Override + protected RangerResponseListPB executeRequest(RangerRequestListPB requests) { + RangerResponseListPB.Builder responses = authz.authorize(requests); + if (requests.hasControlRequest()) { + if (requests.getControlRequest().getRefreshPolicies()) { + authz.refreshPolicies(); + responses.setControlResponse(Ranger.RangerControlResponsePB.newBuilder() + .setSuccess(true).build()); + } + } + return responses.build(); + } + + @Override + protected Class getRequestClass() { + return RangerRequestListPB.class; + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ranger/RangerSubprocessMain.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ranger/RangerSubprocessMain.java new file mode 100644 index 0000000000..4ce633b0d3 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ranger/RangerSubprocessMain.java @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess.ranger; + +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.kudu.subprocess.SubprocessConfiguration; +import org.apache.kudu.subprocess.SubprocessExecutor; + +// The Ranger subprocess that wraps the Kudu Ranger plugin. For the +// plugin to successfully connect to the Ranger service, configurations +// such as ranger-kudu-security.xml (and ranger-kudu-policymgr-ssl.xml +// for SSL connection) are required. To enable auditing in Ranger, +// ranger-kudu-security.xml is needed. The plugin also requires +// core-site.xml to use Hadoop UserGroupInformation for user group +// resolution. +@InterfaceAudience.Private +class RangerSubprocessMain { + + public static void main(String[] args) throws Exception { + SubprocessExecutor subprocessExecutor = new SubprocessExecutor(); + SubprocessConfiguration conf = new SubprocessConfiguration(args); + RangerProtocolHandler protocolHandler = new RangerProtocolHandler(conf.getServicePrincipal(), + conf.getKeytabFile()); + subprocessExecutor.run(conf, protocolHandler, /* timeoutMs= */-1); + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ranger/authorization/RangerKuduAuthorizer.java b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ranger/authorization/RangerKuduAuthorizer.java new file mode 100644 index 0000000000..14477ff57b --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/java/org/apache/kudu/subprocess/ranger/authorization/RangerKuduAuthorizer.java @@ -0,0 +1,248 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess.ranger.authorization; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import javax.annotation.Nullable; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.ranger.plugin.audit.RangerDefaultAuditHandler; +import org.apache.ranger.plugin.policyengine.RangerAccessRequest; +import org.apache.ranger.plugin.policyengine.RangerAccessRequestImpl; +import org.apache.ranger.plugin.policyengine.RangerAccessResourceImpl; +import org.apache.ranger.plugin.policyengine.RangerAccessResult; +import org.apache.ranger.plugin.policyengine.RangerPolicyEngine; +import org.apache.ranger.plugin.service.RangerBasePlugin; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.ranger.Ranger; +import org.apache.kudu.ranger.Ranger.RangerRequestListPB; +import org.apache.kudu.ranger.Ranger.RangerRequestPB; +import org.apache.kudu.subprocess.KuduSubprocessException; + +public class RangerKuduAuthorizer { + private static final Logger LOG = LoggerFactory.getLogger(RangerKuduAuthorizer.class); + // The following properties need to match the Kudu service def in Ranger + // (https://github.com/apache/ranger/blob/master/agents-common/src/main/resources/service-defs/ranger-servicedef-kudu.json). + private static final String APP_ID = "kudu"; + private static final String RANGER_DB_RESOURCE_NAME = "database"; + private static final String RANGER_TABLE_RESOURCE_NAME = "table"; + private static final String RANGER_COLUMN_RESOURCE_NAME = "column"; + private static final String SERVICE_TYPE = "kudu"; + + // The Ranger Kudu plugin. This field is not final as it is used in the + // mock test. + @InterfaceAudience.LimitedPrivate("Test") + RangerBasePlugin plugin; + + public RangerKuduAuthorizer() { + plugin = new RangerBasePlugin(SERVICE_TYPE, APP_ID); + plugin.setResultProcessor(new RangerDefaultAuditHandler()); + } + + /** + * Initializes the Ranger Kudu plugin, which has to be called explicitly + * before doing any authorizations. + * + * @param servicePrincipal the principal name for Kudu to load from the keytab file + * @param keytab the path to the Kudu keytab file + */ + public void init(String servicePrincipal, String keytab) { + // Determine if Kerberos is enabled in the Hadoop configuration. Kerberos should + // also be enabled in the Kudu master. + if (UserGroupInformation.isSecurityEnabled()) { + if (servicePrincipal.isEmpty() || keytab.isEmpty()) { + throw new KuduSubprocessException("Kudu principal and Keytab file must be " + + "provided when Kerberos is enabled in Ranger"); + } + // When Kerberos is enabled, login with the Kudu principal and keytab + // before initializing the Ranger plugin. + try { + LOG.debug("Login with Kudu principal: {}, and keytab: {}", servicePrincipal, keytab); + UserGroupInformation.loginUserFromKeytab(servicePrincipal, keytab); + } catch (IOException e) { + throw new KuduSubprocessException("Failed to login with Kudu principal/keytab", e); + } + } + plugin.init(); + LOG.info("Finished Ranger Kudu plugin initialization"); + } + + /** + * Authorizes a given RangerRequestListPB in Ranger and returns + * a list of RangerAccessResult which contains the authorization + * decisions. Note that the order of results is determined by the order of + * requests. + * + * @param requests a RangerRequestListPB + * @return a list of RangerAccessResult + */ + @VisibleForTesting + public Ranger.RangerResponseListPB.Builder authorize(RangerRequestListPB requests) { + if (!requests.hasUser() || requests.getUser().isEmpty()) { + Ranger.RangerResponseListPB.Builder rangerResponseListPB = Ranger.RangerResponseListPB + .newBuilder(); + List requestsList = requests.getRequestsList(); + for (int i = 0, requestsListSize = requestsList.size(); i < requestsListSize; i++) { + Ranger.RangerResponsePB response = Ranger.RangerResponsePB.newBuilder() + .setAllowed(false) + .build(); + rangerResponseListPB.addResponses(response); + } + return rangerResponseListPB; + } + return authorizeRequests(requests); + } + + /** + * Refreshes the policies cached in the authorization provider on a best-effort basis. It + * doesn't guarantee invalidating the cache or that the latest policies could be pulled from + * the server and doesn't throw exceptions even if the server couldn't be reached. + * + * TODO(abukor): Revisit if RANGER-2906 is fixed. + */ + public void refreshPolicies() { + LOG.debug("Refreshing policies..."); + plugin.refreshPoliciesAndTags(); + LOG.debug("Refreshing policies... DONE"); + } + + /** + * Creates a Ranger access request for the specified user who performs + * the given action on the resource. + * + * @param action action to be authorized on the resource. Note that when it + * is null, Ranger will match to any valid actions + * @param user user who is performing the action + * @param groups the set of groups the user belongs to + * @param db the database name the action is to be performed on + * @param table the table name the action is to be performed on + * @param col the column name the action is to be performed on + * @return the ranger access request + */ + private static RangerAccessRequestImpl createRequest( + @Nullable String action, String user, + @Nullable Set groups, @Nullable String db, + @Nullable String table, @Nullable String col) { + final RangerAccessResourceImpl resource = new RangerAccessResourceImpl(); + resource.setValue(RANGER_DB_RESOURCE_NAME, db); + resource.setValue(RANGER_TABLE_RESOURCE_NAME, table); + resource.setValue(RANGER_COLUMN_RESOURCE_NAME, col); + + final RangerAccessRequestImpl request = new RangerAccessRequestImpl(); + request.setResource(resource); + request.setAccessType(action); + // Add action as it is used for auditing in Ranger. + request.setAction(action); + request.setUser(user); + request.setUserGroups(groups); + return request; + } + + /** + * Creates a RangerResponseListPB for the given + * RangerRequestListPB. + * + * @param requests the given RangerRequestListPB + * @return a list of RangerAccessRequest + */ + private Ranger.RangerResponseListPB.Builder authorizeRequests(RangerRequestListPB requests) { + Ranger.RangerResponseListPB.Builder rangerResponseList = Ranger.RangerResponseListPB + .newBuilder(); + Preconditions.checkArgument(requests.hasUser()); + Preconditions.checkArgument(!requests.getUser().isEmpty()); + final String user = requests.getUser(); + Set groups = getUserGroups(user); + for (RangerRequestPB request : requests.getRequestsList()) { + // Action should be lower case to match the Kudu service def in Ranger. + String action = request.getAction().name().toLowerCase(Locale.ENGLISH); + String db = request.hasDatabase() ? request.getDatabase() : null; + String table = request.hasTable() ? request.getTable() : null; + String column = request.hasColumn() ? request.getColumn() : null; + boolean requiresAdmin = request.hasRequiresDelegateAdmin() && + request.getRequiresDelegateAdmin(); + boolean isOwner = request.hasIsOwner() && request.getIsOwner(); + RangerAccessRequest rangerAccessRequest = createRequest(action, user, groups, db, table, + column); + RangerAccessResult rangerAccessResult = plugin.isAccessAllowed(rangerAccessRequest); + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("RangerAccessRequest [%s] receives result [%s]", + rangerAccessResult.getAccessRequest().toString(), rangerAccessResult.toString())); + } + if (!rangerAccessResult.getIsAllowed() && isOwner) { + rangerAccessRequest = createRequest(action, RangerPolicyEngine.RESOURCE_OWNER, groups, + db, table, column); + rangerAccessResult = plugin.isAccessAllowed(rangerAccessRequest); + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("RangerAccessRequest [%s] receives result [%s]", + rangerAccessResult.getAccessRequest().toString(), rangerAccessResult.toString())); + } + } + if (rangerAccessResult.getIsAllowed() && requiresAdmin) { + rangerAccessRequest = createRequest(RangerPolicyEngine.ADMIN_ACCESS, user, groups, db, + table, column); + rangerAccessResult = plugin.isAccessAllowed(rangerAccessRequest); + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("RangerAccessRequest [%s] receives result [%s]", + rangerAccessResult.getAccessRequest().toString(), rangerAccessResult.toString())); + } + if (!rangerAccessResult.getIsAllowed() && isOwner) { + rangerAccessRequest = createRequest(RangerPolicyEngine.ADMIN_ACCESS, + RangerPolicyEngine.RESOURCE_OWNER, + groups, db, table, column); + rangerAccessResult = plugin.isAccessAllowed(rangerAccessRequest); + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("RangerAccessRequest [%s] receives result [%s]", + rangerAccessResult.getAccessRequest().toString(), rangerAccessResult.toString())); + } + } + } + + Ranger.RangerResponsePB rangerResponsePB = Ranger.RangerResponsePB.newBuilder() + .setAllowed(rangerAccessResult.getIsAllowed()) + .build(); + + rangerResponseList.addResponses(rangerResponsePB); + } + return rangerResponseList; + } + + /** + * Gets the user group mapping from Hadoop. The groups of a user is determined by a + * group mapping service provider. See more detail at + * https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/GroupsMapping.html. + * + * @param user the user name + * @return the set of groups the user belongs to + */ + private static Set getUserGroups(String user) { + Preconditions.checkNotNull(user); + Preconditions.checkArgument(!user.isEmpty()); + UserGroupInformation ugi; + ugi = UserGroupInformation.createRemoteUser(user); + return new HashSet<>(ugi.getGroups()); + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/main/resources/log4j2.properties b/java-scala-spark4/kudu-subprocess/src/main/resources/log4j2.properties new file mode 100644 index 0000000000..b3459ada41 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/main/resources/log4j2.properties @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +status = error +name = PropertiesConfig +appenders = console + +appender.console.type = Console +appender.console.name = STDOUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n + +rootLogger.level = info +rootLogger.appenderRefs = stdout +rootLogger.appenderRef.stdout.ref = STDOUT diff --git a/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/SubprocessTestUtil.java b/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/SubprocessTestUtil.java new file mode 100644 index 0000000000..f08a8a4cf3 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/SubprocessTestUtil.java @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PipedInputStream; +import java.io.PipedOutputStream; +import java.io.PrintStream; +import java.io.UnsupportedEncodingException; +import java.util.function.Function; + +import com.google.protobuf.Any; +import com.google.protobuf.Message; +import com.google.protobuf.Parser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.subprocess.Subprocess.EchoRequestPB; +import org.apache.kudu.subprocess.Subprocess.SubprocessRequestPB; + +/** + * Utility class of common functions used for testing subprocess. + */ +public class SubprocessTestUtil { + private static final Logger LOG = LoggerFactory.getLogger(SubprocessTestUtil.class); + protected static final String[] NO_ARGS = {""}; + protected static final int TIMEOUT_MS = 1000; + + // Helper functors that can be passed around to ensure we either see an error + // or not. + protected static final Function NO_ERR = e -> { + LOG.error(String.format("Unexpected error: %s", e.getMessage())); + fail(); + return null; + }; + protected static final Function HAS_ERR = e -> { + assertTrue(e instanceof KuduSubprocessException); + return null; + }; + + // Pipe that we can write to that will feed requests to the subprocess's + // input pipe. + protected PipedOutputStream requestSenderPipe; + + // Pipe that we can read from that will receive responses from the + // subprocess's output pipe. We'll read from it via BufferedInputStream, + // so wrap the pipe here. + protected final PipedInputStream responseReceiverPipe = new PipedInputStream(); + private final BufferedInputStream bufferedInputStream = + new BufferedInputStream(responseReceiverPipe); + + public static class PrintStreamWithIOException extends PrintStream { + public PrintStreamWithIOException(OutputStream out, boolean autoFlush, String encoding) + throws UnsupportedEncodingException { + super(out, autoFlush, encoding); + } + + @Override + public boolean checkError() { + // Always say that we've got an error. + return true; + } + } + + // Sends a SubprocessRequestPB to the sender pipe, serializing it as + // appropriate. + public void sendRequestToPipe(Subprocess.SubprocessRequestPB req) throws IOException { + requestSenderPipe.write(SubprocessTestUtil.serializeMessage(req)); + } + + // Receives a response from the receiver pipe and deserializes it into a + // SubprocessResponsePB. + public Subprocess.SubprocessResponsePB receiveResponse() throws IOException { + return SubprocessTestUtil.deserializeMessage(bufferedInputStream, + Subprocess.SubprocessResponsePB.parser()); + } + + // Sets up and returns a SubprocessExecutor with the given error handler and + // IO error injection behavior. The SubprocessExecutor will do IO to and from + // 'requestSenderPipe' and 'responseReceiverPipe'. + public SubprocessExecutor setUpExecutorIO(Function errorHandler, + boolean injectIOError) throws IOException { + // Initialize the pipe that we'll push requests to; feed it into the + // executor's input pipe. + PipedInputStream inputPipe = new PipedInputStream(); + requestSenderPipe = new PipedOutputStream(inputPipe); + System.setIn(inputPipe); + + // Initialize the pipe that the executor will write to; feed it into the + // response pipe that we can read from. + PipedOutputStream outputPipe = new PipedOutputStream(responseReceiverPipe); + if (injectIOError) { + System.setOut(new PrintStreamWithIOException(outputPipe, /*autoFlush*/false, "UTF-8")); + } else { + System.setOut(new PrintStream(outputPipe, /*autoFlush*/false, "UTF-8")); + } + return new SubprocessExecutor(errorHandler); + } + + /** + * Constructs a SubprocessRequestPB message of echo request with the + * given payload and sleep. + * + * @param payload the message payload + * @param sleepMs the amount of time to sleep + * @return a SubprocessRequestPB message + */ + public static SubprocessRequestPB createEchoSubprocessRequest(String payload, + int sleepMs) { + SubprocessRequestPB.Builder builder = SubprocessRequestPB.newBuilder(); + EchoRequestPB.Builder echoBuilder = EchoRequestPB.newBuilder(); + echoBuilder.setData(payload); + if (sleepMs > 0) { + echoBuilder.setSleepMs(sleepMs); + } + builder.setRequest(Any.pack(echoBuilder.build())); + return builder.build(); + } + + /** + * Constructs a SubprocessRequestPB message of echo request with the + * given payload. + * + * @param payload the message payload + * @return a SubprocessRequestPB message + */ + public static SubprocessRequestPB createEchoSubprocessRequest(String payload) { + return createEchoSubprocessRequest(payload, 0); + } + + /** + * Serializes the given message to a byte array. + * + * @param message the message + * @return a serialized message in byte array + * @throws IOException if an I/O error occurs + */ + public static byte[] serializeMessage(Message message) throws IOException { + ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream(); + MessageIO messageIO = new MessageIO( + SubprocessConfiguration.MAX_MESSAGE_BYTES_DEFAULT, + /* in= */null, new BufferedOutputStream(byteOutputStream)); + messageIO.writeMessage(message); + return byteOutputStream.toByteArray(); + } + + /** + * Deserializes a message from the byte array. + * + * @param bytes the serialized message in byte array + * @param parser the parser for the message + * @return a message + * @throws IOException if an I/O error occurs + */ + public static T deserializeMessage(byte[] bytes, Parser parser) + throws IOException { + ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes); + return deserializeMessage(new BufferedInputStream(inputStream), parser); + } + + /** + * Deserializes a message from the input stream. + * + * @param inputStream the input stream from which to deserialize the message + * @param parser the parser for the message + * @return a message + * @throws IOException if an I/O error occurs + */ + public static T deserializeMessage(BufferedInputStream inputStream, + Parser parser) throws IOException { + MessageIO messageIO = new MessageIO( + SubprocessConfiguration.MAX_MESSAGE_BYTES_DEFAULT, inputStream, /*out*/null); + byte[] data = messageIO.readBytes(); + return parser.parseFrom(data); + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/TestMessageIO.java b/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/TestMessageIO.java new file mode 100644 index 0000000000..2c55cb88de --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/TestMessageIO.java @@ -0,0 +1,157 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.Arrays; + +import com.google.common.primitives.Bytes; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.function.ThrowingRunnable; + +import org.apache.kudu.subprocess.Subprocess.SubprocessRequestPB; +import org.apache.kudu.test.junit.RetryRule; + +/** + * Tests for reading and writing protobuf message. + */ +public class TestMessageIO { + + @Rule + public RetryRule retryRule = new RetryRule(); + + public static class PrintStreamOverload extends PrintStream { + public PrintStreamOverload(OutputStream out) { + super(out); + } + + /** + * Expands the visibility of setError() for the tests to call it. + */ + @Override + public void setError() { + super.setError(); + } + } + + /** + * Serializes a subprocess message that wraps EchoRequestPB and de-serializes + * it to verify the content. + */ + @Test + public void testBasicEchoMessage() throws Exception { + final String data = "data"; + final SubprocessRequestPB request = SubprocessTestUtil.createEchoSubprocessRequest(data); + final byte[] message = SubprocessTestUtil.serializeMessage(request); + final SubprocessRequestPB actualRequest = SubprocessTestUtil.deserializeMessage( + message, SubprocessRequestPB.parser()); + Assert.assertEquals(request, actualRequest); + } + + /** + * Verifies that writing messages via SubprocessOutputStream can + * catch errors thrown from underlying PrintStream and re-throws + * IOException. + */ + @Test + public void testSubprocessOutputStream() { + final String data = "data"; + final SubprocessRequestPB request = SubprocessTestUtil.createEchoSubprocessRequest(data); + final PrintStreamOverload printStreamOverload = + new PrintStreamOverload(new ByteArrayOutputStream()); + final BufferedOutputStream out = new BufferedOutputStream( + new SubprocessOutputStream(printStreamOverload)); + final MessageIO messageIO = new MessageIO( + SubprocessConfiguration.MAX_MESSAGE_BYTES_DEFAULT, /* in= */null, out); + Throwable thrown = Assert.assertThrows(IOException.class, new ThrowingRunnable() { + @Override + public void run() throws Exception { + printStreamOverload.setError(); + messageIO.writeMessage(request); + } + }); + Assert.assertTrue(thrown.getMessage().contains(SubprocessOutputStream.WRITE_ERR)); + } + + /** + * Verifies that reading malformed messages that has mismatched size + * and body (not enough data in the body) should cause expected error. + */ + @Test + public void testMalformedMessageMismatchSize() { + byte[] size = MessageIO.intToBytes(100); + byte[] body = new byte[10]; + Arrays.fill(body, (byte)0); + byte[] malformedMessage = Bytes.concat(size, body); + BufferedInputStream in = new BufferedInputStream(new ByteArrayInputStream(malformedMessage)); + MessageIO messageIO = new MessageIO(SubprocessConfiguration.MAX_MESSAGE_BYTES_DEFAULT, + in, /* out= */null); + Throwable thrown = Assert.assertThrows(IOException.class, new ThrowingRunnable() { + @Override + public void run() throws Exception { + messageIO.readBytes(); + } + }); + Assert.assertTrue(thrown.getMessage().contains("unable to receive message")); + } + + /** + * Verify that KuduSubprocessException is thrown by MessageIO.readBytes() when + * an oversized message is detected in the input stream. After the oversized + * message is read and discarded, next message can be read from the stream. + */ + @Test + public void testOversizedMessage() throws Exception { + final int maxMessageSize = 32; + byte[] size0 = MessageIO.intToBytes(maxMessageSize + 1); + byte[] body0 = new byte[maxMessageSize + 1]; + Arrays.fill(body0, (byte) 0); + byte[] msg0 = Bytes.concat(size0, body0); + + byte[] size1 = MessageIO.intToBytes(maxMessageSize); + byte[] body1 = new byte[maxMessageSize]; + Arrays.fill(body1, (byte) 1); + byte[] msg1 = Bytes.concat(size1, body1); + + byte[] msg = Bytes.concat(msg0, msg1); + + BufferedInputStream in = new BufferedInputStream(new ByteArrayInputStream(msg)); + + MessageIO messageIO = new MessageIO(maxMessageSize, in, /* out= */null); + Throwable thrown = Assert.assertThrows(KuduSubprocessException.class, new ThrowingRunnable() { + @Override + public void run() throws Exception { + messageIO.readBytes(); + } + }); + Assert.assertTrue(thrown.getMessage().contains( + "message size (33) exceeds maximum message size (32): message is discarded")); + + byte[] readMsg = messageIO.readBytes(); + Assert.assertEquals(maxMessageSize, readMsg.length); + Assert.assertArrayEquals(body1, readMsg); + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/echo/TestEchoSubprocess.java b/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/echo/TestEchoSubprocess.java new file mode 100644 index 0000000000..7d555fd24b --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/echo/TestEchoSubprocess.java @@ -0,0 +1,296 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess.echo; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeoutException; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.function.ThrowingRunnable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.subprocess.MessageIO; +import org.apache.kudu.subprocess.OutboundResponse; +import org.apache.kudu.subprocess.Subprocess.EchoResponsePB; +import org.apache.kudu.subprocess.Subprocess.SubprocessMetricsPB; +import org.apache.kudu.subprocess.Subprocess.SubprocessResponsePB; +import org.apache.kudu.subprocess.SubprocessConfiguration; +import org.apache.kudu.subprocess.SubprocessExecutor; +import org.apache.kudu.subprocess.SubprocessTestUtil; +import org.apache.kudu.test.junit.RetryRule; + +/** + * Tests for subprocess that handles EchoRequest messages in various conditions. + */ +public class TestEchoSubprocess extends SubprocessTestUtil { + private static final Logger LOG = LoggerFactory.getLogger(TestEchoSubprocess.class); + private static final String MESSAGE = "We are one. We are many."; + + @Rule + public RetryRule retryRule = new RetryRule(); + + // Given that executors run multiple threads, the exceptions that we expect + // may not necessarily be the first thrown. This checks for the expected + // error on all thrown exceptions, including suppressed ones. + void assertIncludingSuppressedThrows(Class expectedThrowable, + String errorMessage, + ThrowingRunnable runnable) { + try { + runnable.run(); + } catch (Throwable actualThrown) { + if (expectedThrowable.isInstance(actualThrown) && + actualThrown.toString().contains(errorMessage)) { + return; + } + LOG.info(actualThrown.toString()); + for (Throwable s : actualThrown.getSuppressed()) { + if (s.getClass() == expectedThrowable && s.toString().contains(errorMessage)) { + return; + } + LOG.info(s.toString()); + } + throw new AssertionError(String.format("No errors that match %s with message: %s", + expectedThrowable.toString(), errorMessage)); + } + throw new AssertionError("Didn't throw an exception"); + } + + /** + * Test a regular old message. There should be no exceptions of any kind. + * We should also see some metrics that make sense. + */ + @Test + public void testBasicMsg() throws Exception { + SubprocessExecutor executor = + setUpExecutorIO(NO_ERR, /*injectIOError*/false); + sendRequestToPipe(createEchoSubprocessRequest(MESSAGE)); + + executor.runUntilTimeout(NO_ARGS, new EchoProtocolHandler(), TIMEOUT_MS); + SubprocessResponsePB spResp = receiveResponse(); + EchoResponsePB echoResp = spResp.getResponse().unpack(EchoResponsePB.class); + Assert.assertEquals(MESSAGE, echoResp.getData()); + + SubprocessMetricsPB spMetrics = spResp.getMetrics(); + // We only sent one request, so by the time the executor sent the message, + // the queues should have been empty. + Assert.assertTrue(spMetrics.hasInboundQueueLength()); + Assert.assertTrue(spMetrics.hasOutboundQueueLength()); + Assert.assertEquals(0, spMetrics.getInboundQueueLength()); + Assert.assertEquals(0, spMetrics.getOutboundQueueLength()); + + // The recorded times should be non-zero. + Assert.assertTrue(spMetrics.hasInboundQueueTimeMs()); + Assert.assertTrue(spMetrics.hasOutboundQueueTimeMs()); + Assert.assertTrue(spMetrics.hasExecutionTimeMs()); + Assert.assertTrue(spMetrics.getInboundQueueTimeMs() >= 0); + Assert.assertTrue(spMetrics.getOutboundQueueTimeMs() >= 0); + Assert.assertTrue(spMetrics.getExecutionTimeMs() >= 0); + } + + /** + * Test to see what happens when the execution is the bottleneck. We should + * see it in the execution time and inbound queue time and length metrics. + */ + @Test + public void testSlowExecutionMetrics() throws Exception { + final int SLEEP_MS = 200; + // Suppress checkstyle VariableDeclarationUsageDistance warning. + // CHECKSTYLE:OFF + SubprocessExecutor executor = setUpExecutorIO(NO_ERR, /*injectIOError*/false); + sendRequestToPipe(createEchoSubprocessRequest(MESSAGE, SLEEP_MS)); + sendRequestToPipe(createEchoSubprocessRequest(MESSAGE, SLEEP_MS)); + sendRequestToPipe(createEchoSubprocessRequest(MESSAGE, SLEEP_MS)); + + // Run the executor with a single parser thread so we can make stronger + // assumptions about timing. + executor.runUntilTimeout(new String[]{"-p", "1"}, new EchoProtocolHandler(), TIMEOUT_MS); + // CHECKSTYLE:ON + + SubprocessMetricsPB m = receiveResponse().getMetrics(); + long inboundQueueLength = m.getInboundQueueLength(); + long inboundQueueTimeMs = m.getInboundQueueTimeMs(); + long executionTimeMs = m.getExecutionTimeMs(); + // By the time the first request is written, the second should be sleeping, + // and the third should be waiting in the inbound queue. That said, the + // second could also be in the queue if the parser thread is slow to pick + // up the second request. + Assert.assertTrue( + String.format("Got an unexpected inbound queue length: %s", inboundQueueLength), + inboundQueueLength == 1 || inboundQueueLength == 2); + Assert.assertEquals(0, m.getOutboundQueueLength()); + + // We can't make many guarantees about how long the first request was + // waiting in the queues. + Assert.assertTrue( + String.format("Expected a positive inbound queue time: %s", inboundQueueTimeMs), + inboundQueueTimeMs >= 0); + + // It should've taken longer than our sleep to execute. + Assert.assertTrue( + String.format("Expected a longer execution time than %s ms: %s ms", + SLEEP_MS, executionTimeMs), + executionTimeMs >= SLEEP_MS); + + // The second request should've spent the duration of the first sleep waiting + // in the inbound queue. + m = receiveResponse().getMetrics(); + Assert.assertTrue( + String.format("Expected a higher inbound queue time: %s ms", m.getInboundQueueTimeMs()), + m.getInboundQueueTimeMs() >= SLEEP_MS); + + // The last should've spent the duration of the first two sleeps waiting. + m = receiveResponse().getMetrics(); + Assert.assertTrue( + String.format("Expected a higher inbound queue time: %s", m.getInboundQueueTimeMs()), + m.getInboundQueueTimeMs() >= 2 * SLEEP_MS); + } + + /** + * Test to see what happens when writing is the bottleneck. We should see it + * in the outbound queue metrics. + */ + @Test + public void testSlowWriterMetrics() throws Exception { + SubprocessExecutor executor = + setUpExecutorIO(NO_ERR, /*injectIOError*/false); + final int BLOCK_MS = 200; + executor.blockWriteMs(BLOCK_MS); + sendRequestToPipe(createEchoSubprocessRequest(MESSAGE)); + sendRequestToPipe(createEchoSubprocessRequest(MESSAGE)); + sendRequestToPipe(createEchoSubprocessRequest(MESSAGE)); + executor.runUntilTimeout(NO_ARGS, new EchoProtocolHandler(), TIMEOUT_MS); + + // In writing the first request, the other two requests should've been + // close behind, likely both in the outbound queue. + SubprocessMetricsPB m = receiveResponse().getMetrics(); + Assert.assertEquals(2, m.getOutboundQueueLength()); + + // NOTE: timing on the exact slept time sometimes yields a small error, so + // leave some buffer in checking for correctness. + final int BUFFER_MS = 50; + m = receiveResponse().getMetrics(); + Assert.assertEquals(1, m.getOutboundQueueLength()); + Assert.assertTrue( + String.format("Expected a higher outbound queue time: %s ms", m.getOutboundQueueTimeMs()), + m.getOutboundQueueTimeMs() + BUFFER_MS >= BLOCK_MS); + + m = receiveResponse().getMetrics(); + Assert.assertEquals(0, m.getOutboundQueueLength()); + Assert.assertTrue( + String.format("Expected a higher outbound queue time: %s ms", m.getOutboundQueueTimeMs()), + m.getOutboundQueueTimeMs() + BUFFER_MS >= 2 * BLOCK_MS); + } + + /** + * Test what happens when we send a message that is completely empty (i.e. + * not an empty SubprocessRequestPB message -- no message at all). + */ + @Test + public void testMsgWithEmptyMessage() throws Exception { + SubprocessExecutor executor = setUpExecutorIO(NO_ERR, + /*injectIOError*/false); + requestSenderPipe.write(MessageIO.intToBytes(0)); + // NOTE: reading IO when the pipe is virtually empty leads us to hang. So + // let's put something else onto the pipe and just ensure that our empty + // message was a no-op. + sendRequestToPipe(createEchoSubprocessRequest(MESSAGE)); + executor.runUntilTimeout(NO_ARGS, new EchoProtocolHandler(), TIMEOUT_MS); + + SubprocessResponsePB spResp = receiveResponse(); + EchoResponsePB echoResp = spResp.getResponse().unpack(EchoResponsePB.class); + Assert.assertEquals(MESSAGE, echoResp.getData()); + } + + /** + * Test what happens when we send a message that isn't protobuf. + */ + @Test + public void testMalformedPB() throws Exception { + SubprocessExecutor executor = setUpExecutorIO(NO_ERR, /*injectIOError*/false); + requestSenderPipe.write("malformed".getBytes(StandardCharsets.UTF_8)); + // We need to close the pipe for the read() in InputStream.java to not block + requestSenderPipe.close(); + Throwable thrown = Assert.assertThrows(ExecutionException.class, + () -> executor.run(new SubprocessConfiguration(NO_ARGS), + new EchoProtocolHandler(), TIMEOUT_MS)); + Assert.assertTrue(thrown.getMessage().contains("Unable to read the protobuf message")); + } + + /** + * Try injecting an IOException to the pipe that gets written to + * by the SubprocessExecutor. We should exit with a + * KuduSubprocessException + */ + @Test + public void testInjectIOException() throws Exception { + SubprocessExecutor executor = + setUpExecutorIO(HAS_ERR, /*injectIOError*/true); + sendRequestToPipe(createEchoSubprocessRequest(MESSAGE)); + // NOTE: we don't expect the ExecutionException from the MessageWriter's + // CompletableFuture because, in waiting for completion, the MessageReader + // times out before CompletableFuture.get() is called on the writer. + assertIncludingSuppressedThrows(IOException.class, + "Unable to write to print stream", + () -> executor.run(new SubprocessConfiguration(NO_ARGS), + new EchoProtocolHandler(), TIMEOUT_MS)); + } + + /** + * Parses message with InterruptedException injected should exit + * with KuduSubprocessException. + */ + @Test + public void testInjectInterruptedException() throws Exception { + SubprocessExecutor executor = + setUpExecutorIO(HAS_ERR, /*injectIOError*/false); + executor.interrupt(); + sendRequestToPipe(createEchoSubprocessRequest(MESSAGE)); + assertIncludingSuppressedThrows(ExecutionException.class, + "Unable to put the message to the queue", + () -> executor.run(new SubprocessConfiguration(NO_ARGS), + new EchoProtocolHandler(), TIMEOUT_MS)); + } + + /** + * Check that even if the writer is blocked writing, the + * MessageParser tasks can continue making progress. + */ + @Test + public void testSlowWriterDoesntBlockQueues() throws Exception { + SubprocessExecutor executor = + setUpExecutorIO(NO_ERR, /*injectIOError*/false); + sendRequestToPipe(createEchoSubprocessRequest("a")); + sendRequestToPipe(createEchoSubprocessRequest("b")); + executor.blockWriteMs(2 * TIMEOUT_MS); + Assert.assertThrows(TimeoutException.class, + () -> executor.run(new SubprocessConfiguration(NO_ARGS), + new EchoProtocolHandler(), TIMEOUT_MS)); + + // The MessageWriter took the first message from the outbound queue and + // went to sleep for 2 * TIMEOUT_MS; the second message should still be in + // the queue after TIMEOUT_MS. + BlockingQueue outboundQueue = executor.getOutboundQueue(); + Assert.assertEquals(1, outboundQueue.size()); + } +} diff --git a/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/ranger/TestRangerSubprocess.java b/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/ranger/TestRangerSubprocess.java new file mode 100644 index 0000000000..3e64059116 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/ranger/TestRangerSubprocess.java @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess.ranger; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeoutException; + +import com.google.protobuf.Any; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.mockito.Mockito; + +import org.apache.kudu.ranger.Ranger; +import org.apache.kudu.ranger.Ranger.ActionPB; +import org.apache.kudu.ranger.Ranger.RangerRequestListPB; +import org.apache.kudu.ranger.Ranger.RangerRequestPB; +import org.apache.kudu.ranger.Ranger.RangerResponseListPB; +import org.apache.kudu.subprocess.Subprocess.SubprocessRequestPB; +import org.apache.kudu.subprocess.SubprocessConfiguration; +import org.apache.kudu.subprocess.SubprocessExecutor; +import org.apache.kudu.subprocess.SubprocessTestUtil; +import org.apache.kudu.subprocess.ranger.authorization.RangerKuduAuthorizer; +import org.apache.kudu.test.junit.RetryRule; + +/** + * Tests for the ranger subprocess. + */ +public class TestRangerSubprocess extends SubprocessTestUtil { + + @Rule + public RetryRule retryRule = new RetryRule(); + + private static RangerRequestPB createRangerRequest(ActionPB action, String db, + String table, String col) { + RangerRequestPB.Builder builder = RangerRequestPB.newBuilder(); + builder.setAction(action); + builder.setDatabase(db); + builder.setTable(table); + builder.setColumn(col); + return builder.build(); + } + + private static RangerRequestListPB createRangerRequestList( + List requests, String user) { + RangerRequestListPB.Builder builder = RangerRequestListPB.newBuilder(); + builder.addAllRequests(requests); + builder.setUser(user); + return builder.build(); + } + + private static SubprocessRequestPB createRangerSubprocessRequest( + RangerRequestListPB request) { + SubprocessRequestPB.Builder builder = SubprocessRequestPB.newBuilder(); + builder.setRequest(Any.pack(request)); + return builder.build(); + } + + @Before + public void mockAuthorizer() { + RangerProtocolHandler.authz = Mockito.mock(RangerKuduAuthorizer.class); + } + + /** + * Sends a list of Ranger request and verifies the response by mocking the authorization + * decisions. + */ + @Test + public void testBasicRangerMessage() throws Exception { + final String user = "Alice"; + final String db = "db"; + final String table = "table"; + final String col = "col"; + final RangerRequestPB updateRequest = createRangerRequest(ActionPB.UPDATE, db, table, col); + final RangerRequestPB selectRequest = createRangerRequest(ActionPB.SELECT, db, table, col); + final RangerRequestPB createRequest = createRangerRequest(ActionPB.CREATE, db, table, col); + final List requests = new ArrayList<>(); + // Send multiple ranger requests in one message. + requests.add(updateRequest); + requests.add(selectRequest); + requests.add(createRequest); + final RangerRequestListPB requestList = createRangerRequestList(requests, user); + final SubprocessRequestPB subprocessRequest = createRangerSubprocessRequest(requestList); + + // Mock the authorization results. + RangerResponseListPB.Builder responseListPB = RangerResponseListPB.newBuilder() + .addResponses(Ranger.RangerResponsePB.newBuilder().setAllowed(true).build()) + .addResponses(Ranger.RangerResponsePB.newBuilder().setAllowed(false).build()) + .addResponses(Ranger.RangerResponsePB.newBuilder().setAllowed(true).build()); + Mockito.when(RangerProtocolHandler.authz.authorize(requestList)) + .thenReturn(responseListPB); + + SubprocessExecutor executor = + setUpExecutorIO(NO_ERR, /*injectIOError*/false); + sendRequestToPipe(subprocessRequest); + // We expect the executor to time out since it is non cancelable + // if no exception encountered. + assertThrows(TimeoutException.class, + () -> executor.run(new SubprocessConfiguration(NO_ARGS), + new RangerProtocolHandler(/* servicePrincipal= */"", /* keytab= */""), + TIMEOUT_MS)); + + RangerResponseListPB resp = receiveResponse().getResponse().unpack(RangerResponseListPB.class); + assertTrue(resp.getResponses(/* index= */0).getAllowed()); + assertFalse(resp.getResponses(/* index= */1).getAllowed()); + assertTrue(resp.getResponses(/* index= */2).getAllowed()); + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/ranger/authorization/TestRangerKuduAuthorizer.java b/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/ranger/authorization/TestRangerKuduAuthorizer.java new file mode 100644 index 0000000000..3b6a4266ef --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/test/java/org/apache/kudu/subprocess/ranger/authorization/TestRangerKuduAuthorizer.java @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.subprocess.ranger.authorization; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.List; + +import org.apache.ranger.plugin.model.RangerServiceDef; +import org.apache.ranger.plugin.policyengine.RangerAccessRequest; +import org.apache.ranger.plugin.policyengine.RangerAccessRequestImpl; +import org.apache.ranger.plugin.policyengine.RangerAccessResult; +import org.apache.ranger.plugin.service.RangerBasePlugin; +import org.junit.Rule; +import org.junit.Test; +import org.mockito.Mockito; + +import org.apache.kudu.ranger.Ranger; +import org.apache.kudu.test.junit.RetryRule; + +/** + * Tests for the Ranger authorizer. + */ +public class TestRangerKuduAuthorizer { + + @Rule + public RetryRule retryRule = new RetryRule(); + + /** + * Generates a few ranger authorization results and verifies the + * Ranger authorizer work as expected. + */ + @Test + public void testBasicRangerAuthorizer() { + RangerKuduAuthorizer authz = new RangerKuduAuthorizer(); + authz.plugin = Mockito.mock(RangerBasePlugin.class); + // We have to mock RangerAccessRequestImpl as it does not implement equals(). + // Mock with a positive authz result. + RangerAccessRequestImpl mockUpdateRequest = Mockito.mock(RangerAccessRequestImpl.class); + final RangerAccessResult updateResult = new RangerAccessResult( + /* policyType= */1, "kudu", + new RangerServiceDef(), mockUpdateRequest); + updateResult.setIsAllowed(true); + + // Mock with a negative authz result. + RangerAccessRequestImpl mockCreateRequest = Mockito.mock(RangerAccessRequestImpl.class); + final RangerAccessResult createResult = new RangerAccessResult( + /* policyType= */1, "kudu", + new RangerServiceDef(), mockCreateRequest); + createResult.setIsAllowed(false); + + Mockito.when(authz.plugin.isAccessAllowed(Mockito.any(RangerAccessRequest.class))) + .thenReturn(updateResult, createResult); + + Ranger.RangerRequestListPB rangerRequests = Ranger.RangerRequestListPB.newBuilder() + .addRequests(Ranger.RangerRequestPB.newBuilder().build()) + .addRequests(Ranger.RangerRequestPB.newBuilder().build()) + .setUser("jdoe") + .build(); + + List actualResultsIter = authz.authorize(rangerRequests) + .getResponsesList(); + assertTrue(actualResultsIter.get(0).getAllowed()); + assertFalse(actualResultsIter.get(1).getAllowed()); + } +} \ No newline at end of file diff --git a/java-scala-spark4/kudu-subprocess/src/test/resources/log4j2.properties b/java-scala-spark4/kudu-subprocess/src/test/resources/log4j2.properties new file mode 100644 index 0000000000..22762a1560 --- /dev/null +++ b/java-scala-spark4/kudu-subprocess/src/test/resources/log4j2.properties @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +status = error +name = PropertiesConfig +appenders = console + +appender.console.type = Console +appender.console.name = STDOUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n + +rootLogger.level = info +rootLogger.appenderRefs = stdout +rootLogger.appenderRef.stdout.ref = STDOUT + +logger.kudu.name = org.apache.kudu +logger.kudu.level = debug diff --git a/java-scala-spark4/kudu-test-utils/build.gradle b/java-scala-spark4/kudu-test-utils/build.gradle new file mode 100644 index 0000000000..a4a23e4b7c --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/build.gradle @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +apply from: "$rootDir/gradle/shadow.gradle" + +dependencies { + compile project(path: ":kudu-client") + compile libs.commonsIo + compile libs.guava + compile libs.httpClient + compile libs.httpMime + compile libs.osdetector + + compileUnshaded libs.hamcrest + compileUnshaded(libs.junit) { + // Use the Kudu specified hamcrest. + exclude group: "org.hamcrest" + } + compileUnshaded libs.slf4jApi + + // Support using any kudu-binary jar for tests via `-PuseBinJar=`. + if (propertyExists("useBinJar")) { + apply plugin: "com.google.osdetector" + def jarVersion = propertyWithDefault("useBinJar", project.version) + runtime "org.apache.kudu:kudu-binary:$jarVersion:${osdetector.classifier}" + } + + // Needed for CapturingLogAppender. Optional otherwise. + optional libs.log4jApi + optional libs.log4jCore + optional libs.log4jSlf4jImpl + + optional libs.jsr305 + optional libs.yetusAnnotations + + testCompile libs.jetty + testCompile libs.jettyServlet +} + +// kudu-test-utils has no public Javadoc. +javadoc { + enabled = false +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/CapturingLogAppender.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/CapturingLogAppender.java new file mode 100644 index 0000000000..541ffdd70f --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/CapturingLogAppender.java @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Random; +import javax.annotation.concurrent.GuardedBy; + +import com.google.common.base.Throwables; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.Property; +import org.apache.logging.log4j.core.layout.PatternLayout; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Test utility which wraps Log4j and captures all messages logged + * while it is attached. This can be useful for asserting that a particular + * message is (or is not) logged. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class CapturingLogAppender extends AbstractAppender { + // This is the standard layout used in Kudu tests. + private static final PatternLayout LAYOUT = PatternLayout.newBuilder() + .withPattern("%d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n") + .build(); + + private static final Random RANDOM = new Random(); + + // The caller should detach the logger before calling getAppendedText(). + // Nevertheless, for some reason it is still possible for additional + // append() calls to happen _after_ the logger is detached, which may race + // with getAppendedText(). + @GuardedBy("this") + private StringBuilder appended = new StringBuilder(); + + public CapturingLogAppender() { + // Appender name must be unique so that attaching/detaching works correctly + // when multiple capturing appenders are used recursively. + super(String.format("CapturingToFileLogAppender-%d", RANDOM.nextInt()), + /* filter */ null, LAYOUT, /* ignoreExceptions */ true, Property.EMPTY_ARRAY); + + // If we don't call start(), we get an ugly log error: + // + // ERROR Attempted to append to non-started appender CapturingToFileLogAppender + start(); + } + + @Override + public synchronized void append(LogEvent event) { + appended.append(getLayout().toSerializable(event)); + if (event.getThrown() != null) { + appended.append(Throwables.getStackTraceAsString(event.getThrown())); + appended.append("\n"); + } + } + + /** + * @return all of the appended messages captured thus far, joined together. + */ + public synchronized String getAppendedText() { + return appended.toString(); + } + + /** + * Temporarily attach the capturing appender to the Log4j root logger. + * This can be used in a 'try-with-resources' block: + * + * try (Closeable c = capturer.attach()) { + * ... + * } + * + */ + public Closeable attach() { + LoggerContext.getContext(false).getRootLogger().addAppender(this); + return new Closeable() { + @Override + public void close() throws IOException { + LoggerContext.getContext(false).getRootLogger().removeAppender(CapturingLogAppender.this); + } + }; + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/CapturingToFileLogAppender.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/CapturingToFileLogAppender.java new file mode 100644 index 0000000000..6f3da241d4 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/CapturingToFileLogAppender.java @@ -0,0 +1,200 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.BufferedWriter; +import java.io.Closeable; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.Random; +import java.util.zip.GZIPOutputStream; + +import com.google.common.base.Throwables; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.Property; +import org.apache.logging.log4j.core.layout.PatternLayout; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Test utility which wraps Log4j and captures all messages logged while + * attached, storing them in an (optionally gzipped) temporary file. + * + * The typical lifecycle is as follows: + * + * constructor: temporary file is created and opened. + * append(): a new log event is captured. It may or may not be flushed to disk. + * finish(): all events previously captured in append() are now guaranteed to + * be on disk and visible to readers. No more events may be appended. + * close(): the temporary file is deleted. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class CapturingToFileLogAppender extends AbstractAppender implements AutoCloseable { + // This is the standard layout used in Kudu tests. + private static final PatternLayout LAYOUT = PatternLayout.newBuilder() + .withPattern("%d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n") + .build(); + + private static final Random RANDOM = new Random(); + + private File outputFile; + private Writer outputFileWriter; + + /** + * Creates a new appender. The temporary file is created immediately; it may + * be obtained via getOutputFile(). + * + * Appended messages are buffered; they must be flushed to disk via finish(). + * + * @param useGzip whether to gzip-compress messages when appended + */ + public CapturingToFileLogAppender(boolean useGzip) throws IOException { + // Appender name must be unique so that attaching/detaching works correctly + // when multiple capturing appenders are used recursively. + super(String.format("CapturingToFileLogAppender-%d", RANDOM.nextInt()), + /* filter */ null, LAYOUT, /* ignoreExceptions */ true, Property.EMPTY_ARRAY); + + outputFile = File.createTempFile("captured_output", ".txt.gz"); + try { + OutputStream os = createOutputStream(useGzip); + try { + // As per the recommendation in OutputStreamWriter's Javadoc, we wrap in + // a BufferedWriter to buffer up character conversions. + outputFileWriter = new BufferedWriter(new OutputStreamWriter(os, UTF_8)); + } catch (Throwable t) { + os.close(); + } + } catch (Throwable t) { + outputFile.delete(); + throw t; + } + + // If we don't call start(), we get an ugly log error: + // + // ERROR Attempted to append to non-started appender CapturingToFileLogAppender + // + // It doesn't throw anything so there's no reason to include it in the above + // try/catch monstrosity. + start(); + } + + private OutputStream createOutputStream(boolean useGzip) throws IOException { + OutputStream os = new FileOutputStream(outputFile.getPath()); + if (useGzip) { + try { + os = new GZIPOutputStream(os); + } catch (IOException ex) { + os.close(); + throw ex; + } + } + return os; + } + + @Override + public void close() { + // Just do the cleanup; we don't care about exceptions/logging. + if (outputFileWriter != null) { + try { + outputFileWriter.close(); + } catch (final IOException ioe) { + // ignored + } + outputFileWriter = null; + } + if (outputFile != null) { + outputFile.delete(); + outputFile = null; + } + } + + @Override + public void append(LogEvent event) { + assert outputFileWriter != null; + try { + outputFileWriter.write(LAYOUT.toSerializable(event)); + if (event.getThrown() != null) { + outputFileWriter.write(Throwables.getStackTraceAsString(event.getThrown())); + outputFileWriter.write("\n"); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Flushes any buffered appended events to the on-disk temporary file and + * closes it. + * + * After calling this function, all appended events will be visible to new + * readers. + * + * @throws IOException if close failed + */ + public void finish() throws IOException { + // As per the Writer contract, this will also flush the output stream as + // well as the compressor (if gzip-compression is used). + // + // Why close() and not flush()? It turns out to be remarkably hard to + // flush a GZIPOutputStream [1]. At the very least it also requires calling + // finish(), which is not a generic OutputStream method. But for our use + // case (multiple append() calls followed by a single file access) it's + // easier to just close() when we're done appending. + // + // 1. https://stackoverflow.com/questions/3640080/force-flush-on-a-gzipoutputstream-in-java + // + outputFileWriter.close(); + outputFileWriter = null; + } + + /** + * @return the temporary file opened in the appender's constructor + */ + public File getOutputFile() { + return outputFile; + } + + /** + * Temporarily attach the capturing appender to the Log4j root logger. + * This can be used in a 'try-with-resources' block: + * + * try (Closeable c = capturer.attach()) { + * ... + * } + * + */ + public Closeable attach() { + LoggerContext.getContext(false).getRootLogger().addAppender(this); + return new Closeable() { + @Override + public void close() throws IOException { + LoggerContext.getContext(false).getRootLogger() + .removeAppender(CapturingToFileLogAppender.this); + } + }; + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/ClientTestUtil.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/ClientTestUtil.java new file mode 100644 index 0000000000..72b7a26908 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/ClientTestUtil.java @@ -0,0 +1,509 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; +import com.stumbleupon.async.Callback; +import com.stumbleupon.async.Deferred; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.ColumnTypeAttributes; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.AsyncKuduClient; +import org.apache.kudu.client.AsyncKuduScanner; +import org.apache.kudu.client.AsyncKuduSession; +import org.apache.kudu.client.CreateTableOptions; +import org.apache.kudu.client.Insert; +import org.apache.kudu.client.KuduClient; +import org.apache.kudu.client.KuduException; +import org.apache.kudu.client.KuduPredicate; +import org.apache.kudu.client.KuduScanToken; +import org.apache.kudu.client.KuduScanner; +import org.apache.kudu.client.KuduSession; +import org.apache.kudu.client.KuduTable; +import org.apache.kudu.client.PartialRow; +import org.apache.kudu.client.RowResult; +import org.apache.kudu.client.RowResultIterator; +import org.apache.kudu.client.Upsert; +import org.apache.kudu.util.CharUtil; +import org.apache.kudu.util.DateUtil; +import org.apache.kudu.util.DecimalUtil; + +/** + * Utilities useful for cluster testing. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public abstract class ClientTestUtil { + + private static final Logger LOG = LoggerFactory.getLogger(ClientTestUtil.class); + + public static final Callback defaultErrorCB = new Callback() { + @Override + public Object call(Object arg) throws Exception { + if (arg == null) { + return null; + } + if (arg instanceof Exception) { + LOG.warn("Got exception", (Exception) arg); + } else { + LOG.warn("Got an error response back {}", arg); + } + return new Exception("cannot recover from error: " + arg); + } + }; + + /** + * Counts the rows from the {@code scanner} until exhaustion. It doesn't require the scanner to + * be new, so it can be used to finish scanning a previously-started scan. + */ + public static int countRowsInScan(AsyncKuduScanner scanner, long timeoutMs) throws Exception { + final AtomicInteger counter = new AtomicInteger(); + + Callback cb = new Callback() { + @Override + public Object call(RowResultIterator arg) throws Exception { + if (arg == null) { + return null; + } + counter.addAndGet(arg.getNumRows()); + return null; + } + }; + + while (scanner.hasMoreRows()) { + Deferred data = scanner.nextRows(); + data.addCallbacks(cb, defaultErrorCB); + data.join(timeoutMs); + } + return counter.get(); + } + + /** + * Same as {@link #countRowsInScan(AsyncKuduScanner, long)}, but defaults the timeout to 60 + * seconds. + */ + public static int countRowsInScan(AsyncKuduScanner scanner) throws Exception { + return countRowsInScan(scanner, 60000); + } + + public static int countRowsInScan(KuduScanner scanner) throws KuduException { + int counter = 0; + while (scanner.hasMoreRows()) { + counter += scanner.nextRows().getNumRows(); + } + return counter; + } + + /** + * Scans the table and returns the number of rows. + * @param table the table + * @param predicates optional predicates to apply to the scan + * @return the number of rows in the table matching the predicates + */ + public static long countRowsInTable(KuduTable table, KuduPredicate... predicates) + throws KuduException { + KuduScanner.KuduScannerBuilder scanBuilder = + table.getAsyncClient().syncClient().newScannerBuilder(table); + for (KuduPredicate predicate : predicates) { + scanBuilder.addPredicate(predicate); + } + scanBuilder.setProjectedColumnIndexes(ImmutableList.of()); + return countRowsInScan(scanBuilder.build()); + } + + /** + * Counts the rows in the provided scan tokens. + */ + public static int countScanTokenRows(List tokens, final String masterAddresses, + final long operationTimeoutMs) + throws IOException, InterruptedException { + final AtomicInteger count = new AtomicInteger(0); + List threads = new ArrayList<>(); + for (final KuduScanToken token : tokens) { + final byte[] serializedToken = token.serialize(); + Thread thread = new Thread(new Runnable() { + @Override + public void run() { + try (KuduClient contextClient = new KuduClient.KuduClientBuilder(masterAddresses) + .defaultAdminOperationTimeoutMs(operationTimeoutMs) + .build()) { + KuduScanner scanner = + KuduScanToken.deserializeIntoScanner(serializedToken, contextClient); + try { + int localCount = 0; + while (scanner.hasMoreRows()) { + localCount += Iterators.size(scanner.nextRows()); + } + count.addAndGet(localCount); + } finally { + scanner.close(); + } + } catch (Exception e) { + LOG.error("exception in parallel token scanner", e); + } + } + }); + thread.start(); + threads.add(thread); + } + + for (Thread thread : threads) { + thread.join(); + } + return count.get(); + } + + public static List scanTableToStrings(KuduTable table, + KuduPredicate... predicates) throws Exception { + List rowStrings = Lists.newArrayList(); + KuduScanner.KuduScannerBuilder scanBuilder = + table.getAsyncClient().syncClient().newScannerBuilder(table); + for (KuduPredicate predicate : predicates) { + scanBuilder.addPredicate(predicate); + } + KuduScanner scanner = scanBuilder.build(); + for (RowResult r : scanner) { + rowStrings.add(r.rowToString()); + } + Collections.sort(rowStrings); + return rowStrings; + } + + public static Schema getSchemaWithAllTypes() { + List columns = + ImmutableList.of( + new ColumnSchema.ColumnSchemaBuilder("int8", Type.INT8).key(true).build(), + new ColumnSchema.ColumnSchemaBuilder("int16", Type.INT16).build(), + new ColumnSchema.ColumnSchemaBuilder("int32", Type.INT32).build(), + new ColumnSchema.ColumnSchemaBuilder("int64", Type.INT64).build(), + new ColumnSchema.ColumnSchemaBuilder("bool", Type.BOOL).build(), + new ColumnSchema.ColumnSchemaBuilder("float", Type.FLOAT).build(), + new ColumnSchema.ColumnSchemaBuilder("double", Type.DOUBLE).build(), + new ColumnSchema.ColumnSchemaBuilder("string", Type.STRING).build(), + new ColumnSchema.ColumnSchemaBuilder("binary-array", Type.BINARY).build(), + new ColumnSchema.ColumnSchemaBuilder("binary-bytebuffer", Type.BINARY).build(), + new ColumnSchema.ColumnSchemaBuilder("null", Type.STRING).nullable(true).build(), + new ColumnSchema.ColumnSchemaBuilder("timestamp", Type.UNIXTIME_MICROS).build(), + new ColumnSchema.ColumnSchemaBuilder("decimal", Type.DECIMAL) + .typeAttributes(DecimalUtil.typeAttributes(5, 3)).build(), + new ColumnSchema.ColumnSchemaBuilder("varchar", Type.VARCHAR) + .typeAttributes(CharUtil.typeAttributes(10)).build(), + new ColumnSchema.ColumnSchemaBuilder("date", Type.DATE).build()); + + return new Schema(columns); + } + + public static PartialRow getPartialRowWithAllTypes() { + Schema schema = getSchemaWithAllTypes(); + // Ensure we aren't missing any types + assertEquals(15, schema.getColumnCount()); + + PartialRow row = schema.newPartialRow(); + row.addByte("int8", (byte) 42); + row.addShort("int16", (short) 43); + row.addInt("int32", 44); + row.addLong("int64", 45); + row.addTimestamp("timestamp", new Timestamp(1234567890)); + row.addDate("date", DateUtil.epochDaysToSqlDate(0)); + row.addBoolean("bool", true); + row.addFloat("float", 52.35F); + row.addDouble("double", 53.35); + row.addString("string", "fun with ütf\0"); + row.addVarchar("varchar", "árvíztűrő tükörfúrógép"); + row.addBinary("binary-array", new byte[] { 0, 1, 2, 3, 4 }); + ByteBuffer binaryBuffer = ByteBuffer.wrap(new byte[] { 5, 6, 7, 8, 9 }); + row.addBinary("binary-bytebuffer", binaryBuffer); + row.setNull("null"); + row.addDecimal("decimal", BigDecimal.valueOf(12345, 3)); + return row; + } + + public static CreateTableOptions getAllTypesCreateTableOptions() { + return new CreateTableOptions().setRangePartitionColumns(ImmutableList.of("int8")); + } + + public static Schema getBasicSchema() { + ArrayList columns = new ArrayList<>(5); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("column1_i", Type.INT32).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("column2_i", Type.INT32).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("column3_s", Type.STRING) + .nullable(true) + .desiredBlockSize(4096) + .encoding(ColumnSchema.Encoding.DICT_ENCODING) + .compressionAlgorithm(ColumnSchema.CompressionAlgorithm.LZ4) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("column4_b", Type.BOOL).build()); + return new Schema(columns); + } + + public static CreateTableOptions getBasicCreateTableOptions() { + return new CreateTableOptions().setRangePartitionColumns(ImmutableList.of("key")); + } + + /** + * Creates table options with non-covering range partitioning for a table with + * the basic schema. Range partition key ranges fall between the following values: + * + * [ 0, 50) + * [ 50, 100) + * [200, 300) + */ + public static CreateTableOptions getBasicTableOptionsWithNonCoveredRange() { + Schema schema = getBasicSchema(); + CreateTableOptions option = new CreateTableOptions(); + option.setRangePartitionColumns(ImmutableList.of("key")); + + PartialRow lowerBoundA = schema.newPartialRow(); + lowerBoundA.addInt("key", 0); + PartialRow upperBoundA = schema.newPartialRow(); + upperBoundA.addInt("key", 100); + option.addRangePartition(lowerBoundA, upperBoundA); + + PartialRow lowerBoundB = schema.newPartialRow(); + lowerBoundB.addInt("key", 200); + PartialRow upperBoundB = schema.newPartialRow(); + upperBoundB.addInt("key", 300); + option.addRangePartition(lowerBoundB, upperBoundB); + + PartialRow split = schema.newPartialRow(); + split.addInt("key", 50); + option.addSplitRow(split); + return option; + } + + /** + * A generic helper function to create a table with default test options. + */ + public static KuduTable createDefaultTable(KuduClient client, String tableName) + throws KuduException { + return client.createTable(tableName, getBasicSchema(), getBasicCreateTableOptions()); + } + + /** + * Load a table of default schema with the specified number of records, in ascending key order. + */ + public static void loadDefaultTable(KuduClient client, String tableName, int numRows) + throws KuduException { + KuduTable table = client.openTable(tableName); + KuduSession session = client.newSession(); + for (int i = 0; i < numRows; i++) { + Insert insert = createBasicSchemaInsert(table, i); + session.apply(insert); + } + session.flush(); + session.close(); + } + + public static Upsert createBasicSchemaUpsert(KuduTable table, int key) { + Upsert upsert = table.newUpsert(); + PartialRow row = upsert.getRow(); + row.addInt(0, key); + row.addInt(1, 3); + row.addInt(2, 4); + row.addString(3, "another string"); + row.addBoolean(4, false); + return upsert; + } + + public static Upsert createBasicSchemaUpsertWithDataSize(KuduTable table, int key, int dataSize) { + Upsert upsert = table.newUpsert(); + PartialRow row = upsert.getRow(); + row.addInt(0, key); + row.addInt(1, 3); + row.addInt(2, 4); + + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < dataSize; i++) { + builder.append("*"); + } + String val = builder.toString(); + row.addString(3, val); + row.addBoolean(4, false); + return upsert; + } + + public static Insert createBasicSchemaInsert(KuduTable table, int key) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt(0, key); + row.addInt(1, 2); + row.addInt(2, 3); + row.addString(3, "a string"); + row.addBoolean(4, true); + return insert; + } + + public static KuduTable createFourTabletsTableWithNineRows(AsyncKuduClient client, + String tableName, + final long timeoutMs) + throws Exception { + final int[] KEYS = new int[] { 10, 20, 30 }; + final Schema basicSchema = getBasicSchema(); + CreateTableOptions builder = getBasicCreateTableOptions(); + for (int i : KEYS) { + PartialRow splitRow = basicSchema.newPartialRow(); + splitRow.addInt(0, i); + builder.addSplitRow(splitRow); + } + KuduTable table = client.syncClient().createTable(tableName, basicSchema, builder); + AsyncKuduSession session = client.newSession(); + + // create a table with on empty tablet and 3 tablets of 3 rows each + for (int key1 : KEYS) { + for (int key2 = 1; key2 <= 3; key2++) { + Insert insert = table.newInsert(); + PartialRow row = insert.getRow(); + row.addInt(0, key1 + key2); + row.addInt(1, key1); + row.addInt(2, key2); + row.addString(3, "a string"); + row.addBoolean(4, true); + session.apply(insert).join(timeoutMs); + } + } + session.close().join(timeoutMs); + return table; + } + + public static KuduTable createTableWithOneThousandRows(AsyncKuduClient client, + String tableName, + final int rowDataSize, + final long timeoutMs) + throws Exception { + final int[] KEYS = new int[] { 250, 500, 750 }; + final Schema basicSchema = getBasicSchema(); + CreateTableOptions builder = getBasicCreateTableOptions(); + for (int i : KEYS) { + PartialRow splitRow = basicSchema.newPartialRow(); + splitRow.addInt(0, i); + builder.addSplitRow(splitRow); + } + KuduTable table = client.syncClient().createTable(tableName, basicSchema, builder); + AsyncKuduSession session = client.newSession(); + + // create a table with on 4 tablets of 250 rows each + for (int key = 0; key < 1000; key++) { + Upsert upsert = createBasicSchemaUpsertWithDataSize(table, key, rowDataSize); + session.apply(upsert).join(timeoutMs); + } + session.close().join(timeoutMs); + return table; + } + + public static Schema createManyVarcharsSchema() { + ArrayList columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.VARCHAR) + .typeAttributes(CharUtil.typeAttributes(10)).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.VARCHAR) + .typeAttributes(CharUtil.typeAttributes(10)).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c2", Type.VARCHAR) + .typeAttributes(CharUtil.typeAttributes(10)).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c3", Type.VARCHAR) + .typeAttributes(CharUtil.typeAttributes(10)).nullable(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c4", Type.VARCHAR) + .typeAttributes(CharUtil.typeAttributes(10)).nullable(true).build()); + return new Schema(columns); + } + + public static Schema createManyStringsSchema() { + ArrayList columns = new ArrayList<>(4); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.STRING).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.STRING).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c2", Type.STRING).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c3", Type.STRING).nullable(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c4", Type.STRING).nullable(true).build()); + return new Schema(columns); + } + + public static Schema createSchemaWithBinaryColumns() { + ArrayList columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.BINARY).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.STRING).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c2", Type.DOUBLE).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c3", Type.BINARY).nullable(true).build()); + return new Schema(columns); + } + + public static Schema createSchemaWithTimestampColumns() { + ArrayList columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.UNIXTIME_MICROS) + .key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.UNIXTIME_MICROS) + .nullable(true).build()); + return new Schema(columns); + } + + public static Schema createSchemaWithDateColumns() { + ArrayList columns = new ArrayList(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.DATE).key(true).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.DATE).nullable(true).build()); + return new Schema(columns); + } + + public static Schema createSchemaWithDecimalColumns() { + ArrayList columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.DECIMAL).key(true) + .typeAttributes( + new ColumnTypeAttributes.ColumnTypeAttributesBuilder() + .precision(DecimalUtil.MAX_DECIMAL64_PRECISION).build() + ).build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.DECIMAL).nullable(true) + .typeAttributes( + new ColumnTypeAttributes.ColumnTypeAttributesBuilder() + .precision(DecimalUtil.MAX_DECIMAL128_PRECISION).build() + ).build()); + return new Schema(columns); + } + + public static Schema createSchemaWithImmutableColumns() { + List columns = new ArrayList<>(ClientTestUtil.getBasicSchema().getColumns()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("column5_i", Type.INT32) + .nullable(true).immutable(true).build()); + return new Schema(columns); + } + + public static Schema createSchemaWithNonUniqueKey() { + ArrayList columns = new ArrayList<>(); + columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).nonUniqueKey(true) + .build()); + columns.add(new ColumnSchema.ColumnSchemaBuilder("c1", Type.INT32).nullable(true) + .build()); + return new Schema(columns); + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/KuduTestHarness.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/KuduTestHarness.java new file mode 100644 index 0000000000..b8d01621e3 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/KuduTestHarness.java @@ -0,0 +1,598 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test; + +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.io.InputStream; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.util.List; +import java.util.Random; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.junit.rules.ExternalResource; +import org.junit.runner.Description; +import org.junit.runners.model.Statement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.client.AsyncKuduClient; +import org.apache.kudu.client.AsyncKuduClient.AsyncKuduClientBuilder; +import org.apache.kudu.client.HostAndPort; +import org.apache.kudu.client.KuduClient; +import org.apache.kudu.client.KuduException; +import org.apache.kudu.client.KuduMetrics; +import org.apache.kudu.client.KuduTable; +import org.apache.kudu.client.LocatedTablet; +import org.apache.kudu.client.RemoteTablet; +import org.apache.kudu.client.TimeoutTracker; +import org.apache.kudu.test.cluster.FakeDNS; +import org.apache.kudu.test.cluster.MiniKuduCluster; +import org.apache.kudu.test.cluster.MiniKuduCluster.MiniKuduClusterBuilder; +import org.apache.kudu.test.junit.RetryRule; + +/** + * A Junit Rule that manages a Kudu cluster and clients for testing. + * This rule also includes utility methods for the cluster + * and clients. + * + *
+ * public static class TestFoo {
+ *
+ *  @Rule
+ *  public KuduTestHarness harness = new KuduTestHarness();
+ *
+ *  ...
+ * }
+ * 
+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class KuduTestHarness extends ExternalResource { + + private static final Logger LOG = LoggerFactory.getLogger(KuduTestHarness.class); + + private static final int NUM_MASTER_SERVERS = 3; + private static final int NUM_TABLET_SERVERS = 3; + + // Default timeout/sleep interval for various client operations, + // waiting for various jobs/threads to complete, etc. + public static final int DEFAULT_SLEEP = 50000; + + private final Random randomForTSRestart = RandomUtils.getRandom(); + + private MiniKuduClusterBuilder clusterBuilder; + private MiniKuduCluster miniCluster; + + // We create both versions of the asyncClient for ease of use. + private AsyncKuduClient asyncClient; + private KuduClient client; + + public KuduTestHarness(final MiniKuduClusterBuilder clusterBuilder) { + this.clusterBuilder = clusterBuilder; + } + + public KuduTestHarness() { + this.clusterBuilder = getBaseClusterBuilder(); + } + + /** + * Returns the base MiniKuduClusterBuilder used when creating a + * KuduTestHarness with the default constructor. This is useful + * if you want to add to the default cluster setup. + */ + public static MiniKuduClusterBuilder getBaseClusterBuilder() { + return new MiniKuduClusterBuilder() + .numMasterServers(NUM_MASTER_SERVERS) + .numTabletServers(NUM_TABLET_SERVERS); + } + + @Override + public Statement apply(Statement base, Description description) { + // Set any master server flags defined in the method level annotation. + MasterServerConfig masterServerConfig = description.getAnnotation(MasterServerConfig.class); + if (masterServerConfig != null) { + for (String flag : masterServerConfig.flags()) { + clusterBuilder.addMasterServerFlag(flag); + } + } + // Pass through any location mapping defined in the method level annotation. + LocationConfig locationConfig = description.getAnnotation(LocationConfig.class); + if (locationConfig != null) { + for (String location : locationConfig.locations()) { + clusterBuilder.addLocation(location); + } + } + // Set any tablet server flags defined in the method level annotation. + TabletServerConfig tabletServerConfig = description.getAnnotation(TabletServerConfig.class); + if (tabletServerConfig != null) { + for (String flag : tabletServerConfig.flags()) { + clusterBuilder.addTabletServerFlag(flag); + } + } + + // Enable Kerberos if needed and set the SPN. + EnableKerberos enableKerberos = description.getAnnotation(EnableKerberos.class); + if (enableKerberos != null) { + clusterBuilder.enableKerberos(); + clusterBuilder.principal(enableKerberos.principal()); + } + + // Generate the ExternalResource Statement. + Statement statement = super.apply(base, description); + // Wrap in the RetryRule to rerun flaky tests. + return new RetryRule().apply(statement, description); + } + + @Override + public void before() throws Exception { + FakeDNS.getInstance().install(); + // Enable the client metrics for tests. + KuduMetrics.setEnabled(true); + LOG.info("Creating a new MiniKuduCluster..."); + miniCluster = clusterBuilder.build(); + LOG.info("Creating a new Kudu client..."); + asyncClient = new AsyncKuduClientBuilder(miniCluster.getMasterAddressesAsString()) + .defaultAdminOperationTimeoutMs(DEFAULT_SLEEP) + .saslProtocolName(miniCluster.getPrincipal()) + .build(); + client = asyncClient.syncClient(); + } + + @Override + public void after() { + try { + if (client != null) { + client.shutdown(); + // No need to explicitly shutdown the async client, + // shutting down the sync client effectively does that. + } + } catch (KuduException e) { + LOG.warn("Error while shutting down the test client", e); + } finally { + if (miniCluster != null) { + miniCluster.shutdown(); + } + } + } + + public KuduClient getClient() { + return client; + } + + public AsyncKuduClient getAsyncClient() { + return asyncClient; + } + + /** + * Helper method to easily kill a tablet server that serves the given table's only tablet's + * leader. The currently running test case will be failed if there's more than one tablet, + * if the tablet has no leader after some retries, or if the tablet server was already killed. + * + * This method is thread-safe. + * @param table a KuduTable which will get its single tablet's leader killed. + * @throws Exception + */ + @SuppressWarnings("deprecation") + public void killTabletLeader(KuduTable table) throws Exception { + List tablets = table.getTabletsLocations(DEFAULT_SLEEP); + if (tablets.size() != 1) { + fail("Currently only support killing leaders for tables containing 1 tablet, table " + + table.getName() + " has " + tablets.size()); + } + LocatedTablet tablet = tablets.get(0); + if (tablet.getReplicas().size() == 1) { + fail("Table " + table.getName() + " only has 1 tablet, please enable replication"); + } + + HostAndPort hp = findLeaderTabletServer(tablet); + miniCluster.killTabletServer(hp); + } + + /** + * Helper method to kill a tablet server that hosts the given tablet's leader + * replica. + * This method is thread-safe. + * + * @param tablet a RemoteTablet which will get its leader killed + * @return the host and port of the tablet server which hosted the tablet's + * leader replica + * @throws Exception if no leader replica found after a few retries, + * or if the tablet server isn't running + */ + public HostAndPort killTabletLeader(RemoteTablet tablet) throws Exception { + return killTabletLeader(new LocatedTablet(tablet)); + } + + /** + * Helper method to kill a tablet server that serves the given tablet's leader + * replica. + * + * This method is thread-safe. + * @param tablet a LocatedTablet which will get its leader killed + * @return the host and port of the tablet server which hosted the tablet's + * leader replica + * @throws Exception if no leader replica found or if the tablet server isn't + * running + */ + public HostAndPort killTabletLeader(LocatedTablet tablet) throws Exception { + HostAndPort hp = findLeaderTabletServer(tablet); + miniCluster.killTabletServer(hp); + return hp; + } + + /** + * Finds the RPC port of the given tablet's leader tserver. + * @param tablet a LocatedTablet + * @return the host and port of the given tablet's leader tserver + * @throws Exception if unable to find a tablet server with leader replica + */ + public HostAndPort findLeaderTabletServer(LocatedTablet tablet) + throws Exception { + LocatedTablet.Replica leader = null; + TimeoutTracker timeoutTracker = new TimeoutTracker(); + timeoutTracker.setTimeout(DEFAULT_SLEEP); + while (leader == null) { + if (timeoutTracker.timedOut()) { + fail("Timed out while trying to find a leader for this table"); + } + + leader = tablet.getLeaderReplica(); + if (leader == null) { + LOG.info("Sleeping while waiting for a tablet LEADER to arise, currently slept {} ms", + timeoutTracker.getElapsedMillis()); + Thread.sleep(50); + } + } + return new HostAndPort(leader.getRpcHost(), leader.getRpcPort()); + } + + /** + * Start tablet server which has previously been registered at the specified + * host and port. + * + * @param hp host and port of the tablet server to start back + * @throws Exception + */ + public void startTabletServer(HostAndPort hp) throws Exception { + miniCluster.startTabletServer(hp); + } + + /** + * Find the host and port of the leader master. + * @return the host and port of the leader master + * @throws Exception if we are unable to find the leader master + */ + public HostAndPort findLeaderMasterServer() throws Exception { + return client.findLeaderMasterServer(); + } + + /** + * Helper method to easily kill the leader master. + * + * This method is thread-safe. + * @return the host and port of the detected leader master + * @throws Exception if there is an error finding or killing the leader master. + */ + public HostAndPort killLeaderMasterServer() throws Exception { + HostAndPort hp = findLeaderMasterServer(); + miniCluster.killMasterServer(hp); + return hp; + } + + /** + * Picks at random a tablet server that serves tablets from the passed table and restarts it. + * @param table table to query for a TS to restart + * @throws Exception + */ + @SuppressWarnings("deprecation") + public void restartTabletServer(KuduTable table) throws Exception { + List tablets = table.getTabletsLocations(DEFAULT_SLEEP); + if (tablets.isEmpty()) { + fail("Table " + table.getName() + " doesn't have any tablets"); + } + + LocatedTablet tablet = tablets.get(0); + LocatedTablet.Replica replica = + tablet.getReplicas().get(randomForTSRestart.nextInt(tablet.getReplicas().size())); + HostAndPort hp = new HostAndPort(replica.getRpcHost(), replica.getRpcPort()); + miniCluster.killTabletServer(hp); + miniCluster.startTabletServer(hp); + } + + /** + * Kills a tablet server that serves the given tablet's leader and restarts it. + * @param tablet a RemoteTablet which will get its leader killed and restarted + * @return the host and port of the restarted tablet server + * @throws Exception + */ + public HostAndPort restartTabletServer(RemoteTablet tablet) throws Exception { + HostAndPort hp = findLeaderTabletServer(new LocatedTablet(tablet)); + miniCluster.killTabletServer(hp); + miniCluster.startTabletServer(hp); + return hp; + } + + /** + * Set a run-time flag for a tablet server identified by its host and port. + * @param hp HostAndPort object identifying the target tablet server + * @param flag a flag to set (prefix dash(es) omitted) + * @param value a stringified representation of the flag's value to set + * @throws IOException + */ + public void setTabletServerFlag(HostAndPort hp, String flag, String value) throws IOException { + miniCluster.setTServerFlag(hp, flag, value); + } + + /** + * Kills and starts back a tablet server that serves the given tablet's leader. + * @param tablet a LocatedTablet which is hosted by the target tablet server + * @return the host and port of the restarted tablet server + * @throws Exception + */ + public HostAndPort restartTabletLeader(LocatedTablet tablet) throws Exception { + HostAndPort hp = findLeaderTabletServer(tablet); + miniCluster.killTabletServer(hp); + miniCluster.startTabletServer(hp); + return hp; + } + + /** + * Kills and restarts the leader master. + * @return the host and port of the restarted master + * @throws Exception + */ + public HostAndPort restartLeaderMaster() throws Exception { + HostAndPort hp = findLeaderMasterServer(); + miniCluster.killMasterServer(hp); + miniCluster.startMasterServer(hp); + return hp; + } + + /** + * Start master which has previously been registered at the specified + * host and port. + * + * @param hp host and port of the master to start back + * @throws Exception + */ + public void startMaster(HostAndPort hp) throws Exception { + miniCluster.startMasterServer(hp); + } + + /** + * Finds and pauses the leader master. + * @return the host and port of the paused master + * @throws Exception + */ + public HostAndPort pauseLeaderMaster() throws Exception { + HostAndPort hp = findLeaderMasterServer(); + miniCluster.pauseMasterServer(hp); + return hp; + } + + /** + * Pauses the specified master. + * @throws Exception + */ + public void pauseMaster(HostAndPort hp) throws Exception { + miniCluster.pauseMasterServer(hp); + } + + /** + * Resumes the specified master. + * @throws Exception + */ + public void resumeMaster(HostAndPort hp) throws Exception { + miniCluster.resumeMasterServer(hp); + } + + /** + * Set a run-time flag for a Kudu master identified by its host and port. + * @param hp HostAndPort object identifying the target master + * @param flag a flag to set (prefix dash(es) omitted) + * @param value a stringified representation of the flag's value to set + * @throws IOException + */ + public void setMasterFlag(HostAndPort hp, String flag, String value) throws IOException { + miniCluster.setMasterFlag(hp, flag, value); + } + + /** + * Return the comma-separated list of "host:port" pairs that describes the master + * config for this cluster. + * @return The master config string. + */ + public String getMasterAddressesAsString() { + return miniCluster.getMasterAddressesAsString(); + } + + /** + * @return the list of master servers + */ + public List getMasterServers() { + return miniCluster.getMasterServers(); + } + + /** + * @return the list of tablet servers + */ + public List getTabletServers() { + return miniCluster.getTabletServers(); + } + + /** + * @return path to the mini cluster root directory + */ + public String getClusterRoot() { + return miniCluster.getClusterRoot(); + } + + /** + * @return the service principal name + */ + public String getPrincipal() { + return miniCluster.getPrincipal(); + } + + /** + * Kills all the master servers. + * Does nothing to the servers that are already dead. + * + * @throws IOException + */ + public void killAllMasterServers() throws IOException { + miniCluster.killAllMasterServers(); + } + + /** + * Starts all the master servers. + * Does nothing to the servers that are already running. + * + * @throws IOException + */ + public void startAllMasterServers() throws IOException { + miniCluster.startAllMasterServers(); + } + + /** + * Kills all the tablet servers. + * Does nothing to the servers that are already dead. + * + * @throws IOException + */ + public void killAllTabletServers() throws IOException { + miniCluster.killAllTabletServers(); + } + + /** + * Starts all the tablet servers. + * Does nothing to the servers that are already running. + * + * @throws IOException + */ + public void startAllTabletServers() throws IOException { + miniCluster.startAllTabletServers(); + } + + /** + * Removes all credentials for all principals from the Kerberos credential cache. + */ + public void kdestroy() throws IOException { + miniCluster.kdestroy(); + } + + /** + * Re-initialize Kerberos credentials for the given username, writing them + * into the Kerberos credential cache. + * @param username the username to kinit as + */ + public void kinit(String username) throws IOException { + miniCluster.kinit(username); + } + + /** + * Resets the clients so that their state is completely fresh, including meta + * cache, connections, open tables, sessions and scanners, and propagated timestamp. + */ + public void resetClients() throws IOException { + client.shutdown(); + asyncClient = new AsyncKuduClientBuilder(miniCluster.getMasterAddressesAsString()) + .defaultAdminOperationTimeoutMs(DEFAULT_SLEEP) + .build(); + client = asyncClient.syncClient(); + } + + public String createJwtFor(String accountId, String subject, boolean isValid) throws IOException { + return miniCluster.createJwtFor(accountId, subject, isValid); + } + + /** + * @return cluster's CA certificate in DER format or null if catalog manager isn't ready + */ + public byte[] getClusterCACertDer() throws IOException { + return miniCluster.getCACertDer(); + } + + /** + * An annotation that can be added to each test method to + * define additional master server flags to be used when + * creating the test cluster. + * + * ex: @MasterServerConfig(flags = { "key1=valA", "key2=valB" }) + */ + @Retention(RetentionPolicy.RUNTIME) + @Target({ElementType.METHOD}) + public @interface MasterServerConfig { + String[] flags(); + } + + /** + * An annotation that can be added to each test method to + * define additional tablet server flags to be used when + * creating the test cluster. + * + * ex: @TabletServerConfig(flags = { "key1=valA", "key2=valB" }) + */ + @Retention(RetentionPolicy.RUNTIME) + @Target({ElementType.METHOD}) + public @interface TabletServerConfig { + String[] flags(); + } + + /** + * An annotation that can be added to each test method to + * define a location mapping for the cluster. Location + * mappings are defined as a series of 'location:number' + * pairs. + * + * Note that, in many Kudu tests, multiple masters will be run, each + * on their own network interface within the same machine, and client + * connections will appear to come from the same interface as the + * master being connected to. So, for example, if there are two + * clients, three masters, and three tablet servers, nine locations + * will be assigned: each client will get a location from each + * master (from a different IP), and each tablet server will get a + * location. The easiest way to work around this for our simple + * Java client tests is to set the number of mappings to be something + * at least (# masters) * (# clients) + (# tablet servers) + */ + @Retention(RetentionPolicy.RUNTIME) + @Target({ElementType.METHOD}) + public @interface LocationConfig { + String[] locations(); + } + + /** + * An annotation that can be added to each test method to enable Kerberos. + * The service principal name can be configured by specifying 'principal'. + */ + @Retention(RetentionPolicy.RUNTIME) + @Target({ElementType.METHOD}) + public @interface EnableKerberos { + String principal() default "kudu"; + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/MetricTestUtils.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/MetricTestUtils.java new file mode 100644 index 0000000000..c802c4a2b2 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/MetricTestUtils.java @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test; + +import static org.junit.Assert.assertEquals; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.client.KuduMetrics; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class MetricTestUtils { + + /** + * @return the total sum of "rpc.request" metrics + */ + public static long totalRequestCount() { + return (long) KuduMetrics.totalCount(KuduMetrics.RPC_REQUESTS_METRIC); + } + + /** + * Validates that the count change in the matching "rpc.request" metrics matches + * the expectedCount when the callable f is called. + * + * @param expectedCount the expected count + * @param clientId the clientId to filter on + * @param f the callable to call and validate + * @param the return type + * @return the return value from f + * @throws Exception when f throws an exception + */ + public static T validateRequestCount(int expectedCount, String clientId, + Callable f) throws Exception { + return validateRequestCount(expectedCount, clientId, Collections.emptyList(), f); + } + + /** + * Validates that the count change in the matching "rpc.request" metrics matches + * the expectedCount when the callable f is called. + * + * @param expectedCount the expected count + * @param clientId the clientId to filter on + * @param rpcMethodName the rpc method name to filter on + * @param f the callable to call and validate + * @param the return type + * @return the return value from f + * @throws Exception when f throws an exception + */ + public static T validateRequestCount(int expectedCount, String clientId, + String rpcMethodName, Callable f) throws Exception { + return validateRequestCount(expectedCount, clientId, + Collections.singletonList(rpcMethodName), f); + } + + /** + * Validates that the count change in the matching "rpc.request" metrics matches + * the expectedCount when the callable f is called. + * + * @param expectedCount the expected count + * @param clientId the clientId to filter on + * @param rpcMethodNames the rpc method names to filter on + * @param f the callable to call and validate + * @param the return type + * @return the return value from f + * @throws Exception when f throws an exception + */ + public static T validateRequestCount(int expectedCount, String clientId, + List rpcMethodNames, Callable f) + throws Exception { + Map beforeMap = new HashMap<>(); + for (String rpcMethodName : rpcMethodNames) { + beforeMap.put(rpcMethodName, + (long) KuduMetrics.totalCount(KuduMetrics.RPC_REQUESTS_METRIC, + KuduMetrics.CLIENT_ID_TAG, clientId, KuduMetrics.METHOD_NAME_TAG, rpcMethodName)); + } + T t = f.call(); + long count = 0; + for (Map.Entry entry : beforeMap.entrySet()) { + String rpcMethodName = entry.getKey(); + long before = entry.getValue(); + long after = (long) KuduMetrics.totalCount(KuduMetrics.RPC_REQUESTS_METRIC, + KuduMetrics.CLIENT_ID_TAG, clientId, KuduMetrics.METHOD_NAME_TAG, rpcMethodName); + count += after - before; + } + assertEquals(expectedCount, count); + return t; + } + +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/ProtobufUtils.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/ProtobufUtils.java new file mode 100644 index 0000000000..93a79a7916 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/ProtobufUtils.java @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test; + +import com.google.protobuf.ByteString; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +import org.apache.kudu.Common; +import org.apache.kudu.consensus.Metadata; +import org.apache.kudu.master.Master; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class ProtobufUtils { + + /** + * Get a PartitionPB with empty start and end keys. + * @return a fake partition + */ + public static Common.PartitionPB.Builder getFakePartitionPB() { + Common.PartitionPB.Builder partition = Common.PartitionPB.newBuilder(); + partition.setPartitionKeyStart(ByteString.EMPTY); + partition.setPartitionKeyEnd(ByteString.EMPTY); + return partition; + } + + /** + * Get a PartitionPB with specified start and end keys. + * @param partitionKeyStart start key + * @param partitionKeyEnd end key + * @return a fake partition + */ + public static Common.PartitionPB.Builder getFakePartitionPB( + byte[] partitionKeyStart, byte[] partitionKeyEnd) { + Common.PartitionPB.Builder partition = Common.PartitionPB.newBuilder(); + partition.setPartitionKeyStart(ByteString.copyFrom(partitionKeyStart)); + partition.setPartitionKeyEnd(ByteString.copyFrom(partitionKeyEnd)); + return partition; + } + + /** + * Create a InternedReplicaPB based on the passed information. + * @param tsInfoIndex server's index in the TSInfoPB list + * @param role server's role in the configuration + * @return a fake InternedReplicaPB + */ + public static Master.TabletLocationsPB.InternedReplicaPB.Builder getFakeTabletInternedReplicaPB( + int tsInfoIndex, Metadata.RaftPeerPB.Role role) { + Master.TabletLocationsPB.InternedReplicaPB.Builder internedReplicaBuilder = + Master.TabletLocationsPB.InternedReplicaPB.newBuilder(); + internedReplicaBuilder.setTsInfoIdx(tsInfoIndex); + internedReplicaBuilder.setRole(role); + return internedReplicaBuilder; + } + + /** + * Create a TSInfoPB based on the passed information. + * @param uuid server's identifier + * @param host server's hostname + * @param port server's port + * @return a fake TSInfoPB + */ + public static Master.TSInfoPB.Builder getFakeTSInfoPB(String uuid, String host, int port) { + Master.TSInfoPB.Builder tsInfoBuilder = Master.TSInfoPB.newBuilder(); + Common.HostPortPB.Builder hostBuilder = Common.HostPortPB.newBuilder(); + hostBuilder.setHost(host); + hostBuilder.setPort(port); + tsInfoBuilder.addRpcAddresses(hostBuilder); + tsInfoBuilder.setPermanentUuid(ByteString.copyFromUtf8(uuid)); + return tsInfoBuilder; + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/RandomUtils.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/RandomUtils.java new file mode 100644 index 0000000000..48885392f2 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/RandomUtils.java @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test; + +import java.util.Random; + +import com.google.common.base.Preconditions; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class RandomUtils { + private static final Logger LOG = LoggerFactory.getLogger(RandomUtils.class); + + private static final String TEST_RANDOM_SEED_PROP = "testRandomSeed"; + + /** + * Get an instance of Random for use in tests and logs the seed used. + * + * Uses a default seed of System.currentTimeMillis() with the option to + * override via the testRandomSeed system property. + */ + public static Random getRandom() { + // First check the system property. + long seed = System.currentTimeMillis(); + if (System.getProperty(TEST_RANDOM_SEED_PROP) != null) { + seed = Long.parseLong(System.getProperty(TEST_RANDOM_SEED_PROP)); + LOG.info("System property {} is defined. Overriding random seed: {}", + TEST_RANDOM_SEED_PROP, seed); + } + LOG.info("Using random seed: {}", seed); + return new Random(seed); + } + + /* + * Return the next pseudorandom integer generated by 'random' in the range [start, end). + * 'start' must be strictly less than 'end'. + */ + public static int nextIntInRange(Random random, int start, int end) { + Preconditions.checkArgument( + start < end, + String.format("start must be strictly less than end (%d < %d)", start, end)); + return start + random.nextInt(end - start); + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/TempDirUtils.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/TempDirUtils.java new file mode 100644 index 0000000000..9524dea7d7 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/TempDirUtils.java @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.apache.commons.io.FileUtils; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utilities for retrieving and creating temp directories. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class TempDirUtils { + + /** + * An enum to control whether a temporary directory created by + * {@link #makeTempDirectory(String, DeleteOnExit)} is recursively deleted on JVM exit, + * including the contents of the directory. + */ + public enum DeleteOnExit { + /** Do not delete the directory on exit. */ + NO_DELETE_ON_EXIT, + /** Recursively delete the directory and its contents on exit. */ + DELETE_RECURSIVELY_ON_EXIT, + } + + private static final Logger LOG = LoggerFactory.getLogger(TempDirUtils.class); + + /** + * Match the C++ MiniCluster test functionality for overriding the tmp directory used. + * See MakeClusterRoot in src/kudu/tools/tool_action_test.cc. + * If the TEST_TMPDIR environment variable is defined that directory will be used + * instead of the default temp directory. + * + * @param prefix a directory name to be created, in environment variable TEST_TMPDIR if defined, + * else within the java.io.tmpdir system property + * @param deleteRecursivelyOnExit whether to recursively delete the directory and all its + * contents on JVM exit + * @return temp directory as a file + * @throws IOException if a temp directory cannot be created + */ + public static File makeTempDirectory(String prefix, DeleteOnExit deleteRecursivelyOnExit) + throws IOException { + String testTmpdir = System.getenv("TEST_TMPDIR"); + File newDir; + if (testTmpdir != null) { + LOG.info("Using the temp directory defined by TEST_TMPDIR: " + testTmpdir); + newDir = Files.createTempDirectory(Paths.get(testTmpdir), prefix).toFile(); + } else { + newDir = Files.createTempDirectory(prefix).toFile(); + } + if (deleteRecursivelyOnExit == DeleteOnExit.DELETE_RECURSIVELY_ON_EXIT) { + registerToRecursivelyDeleteOnShutdown(newDir.toPath()); + } + return newDir; + } + + /** + * Register a JVM shutdown hook to recursively delete the specified directory on JVM shutdown. + * @param path directory to delete on shutdown + */ + private static void registerToRecursivelyDeleteOnShutdown(Path path) { + final Path absPath = path.toAbsolutePath(); + Runtime.getRuntime().addShutdownHook(new Thread() { + @Override + public void run() { + File dir = absPath.toFile(); + if (!dir.exists()) { + return; + } + try { + FileUtils.deleteDirectory(dir); + } catch (IOException exc) { + LOG.warn("Unable to remove directory tree " + absPath.toString(), exc); + } + } + }); + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/FakeDNS.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/FakeDNS.java new file mode 100644 index 0000000000..663c3e7386 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/FakeDNS.java @@ -0,0 +1,196 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test.cluster; + +import java.lang.reflect.Field; +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.Method; +import java.lang.reflect.Proxy; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import javax.annotation.concurrent.GuardedBy; + +import com.google.common.base.Throwables; +import com.google.common.net.InetAddresses; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Fake DNS resolver which allows our tests to work well even though we use + * strange loopback IP addresses (127.x.y.z) with no corresponding reverse + * DNS. + * + * This overrides the reverse lookups for such IPs to return the same address + * in String form. + * + * Without this class, reverse DNS lookups for such addresses often take + * 5 seconds to return, causing timeouts and overall test slowness. + * + * In the future this class might also be extended to test more interesting + * DNS-related scenarios. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class FakeDNS { + static FakeDNS instance = new FakeDNS(); + + @GuardedBy("this") + private Map forwardResolutions = new HashMap<>(); + + @GuardedBy("this") + private Map reverseResolutions = new HashMap<>(); + + /** whether the fake resolver has been installed */ + @GuardedBy("this") + private boolean installed = false; + + private FakeDNS() { + } + + public static FakeDNS getInstance() { + return instance; + } + + public synchronized void addForwardResolution(String hostname, InetAddress ip) { + forwardResolutions.put(hostname, ip); + } + + public synchronized void addReverseResolution(InetAddress ip, String hostname) { + reverseResolutions.put(ip, hostname); + } + + /** + * Install the fake DNS resolver into the Java runtime. + */ + public synchronized void install() { + if (installed) { + return; + } + try { + try { + // Override the NameService in Java 9 or later. + final Class nameServiceInterface = Class.forName("java.net.InetAddress$NameService"); + Field field = InetAddress.class.getDeclaredField("nameService"); + // Get the default NameService to fallback to. + Method method = InetAddress.class.getDeclaredMethod("createNameService"); + method.setAccessible(true); + Object fallbackNameService = method.invoke(null); + // Create a proxy instance to set on the InetAddress field which will handle + // all NameService calls. + Object proxy = Proxy.newProxyInstance(nameServiceInterface.getClassLoader(), + new Class[]{nameServiceInterface}, new NameServiceListener(fallbackNameService)); + field.setAccessible(true); + field.set(InetAddress.class, proxy); + } catch (final ClassNotFoundException | NoSuchFieldException e) { + // Override the NameService in Java 8 or earlier. + final Class nameServiceInterface = Class.forName("sun.net.spi.nameservice.NameService"); + Field field = InetAddress.class.getDeclaredField("nameServices"); + // Get the default NameService to fallback to. + Method method = InetAddress.class.getDeclaredMethod("createNSProvider", String.class); + method.setAccessible(true); + Object fallbackNameService = method.invoke(null, "default"); + // Create a proxy instance to set on the InetAddress field which will handle + // all NameService calls. + Object proxy = Proxy.newProxyInstance(nameServiceInterface.getClassLoader(), + new Class[]{nameServiceInterface}, new NameServiceListener(fallbackNameService)); + field.setAccessible(true); + // Java 8 or earlier takes a list of NameServices + field.set(InetAddress.class, Arrays.asList(proxy)); + } + } catch (ReflectiveOperationException e) { + throw new RuntimeException(e); + } + installed = true; + } + + /** + * The NameService in all versions of Java has the same interface, so we + * can use the same InvocationHandler as our proxy instance for both + * java.net.InetAddress$NameService and sun.net.spi.nameservice.NameService. + */ + private class NameServiceListener implements InvocationHandler { + + private final Object fallbackNameService; + + // Creates a NameServiceListener with a NameService implementation to + // fallback to. The parameter is untyped so we can handle the NameService + // type in all versions of Java with reflection. + NameServiceListener(Object fallbackNameService) { + this.fallbackNameService = fallbackNameService; + } + + private InetAddress[] lookupAllHostAddr(String host) throws UnknownHostException { + InetAddress inetAddress; + synchronized (FakeDNS.this) { + inetAddress = forwardResolutions.get(host); + } + if (inetAddress != null) { + return new InetAddress[]{inetAddress}; + } + + try { + Method method = fallbackNameService.getClass() + .getDeclaredMethod("lookupAllHostAddr", String.class); + method.setAccessible(true); + return (InetAddress[]) method.invoke(fallbackNameService, host); + } catch (ReflectiveOperationException e) { + Throwables.propagateIfPossible(e.getCause(), UnknownHostException.class); + throw new AssertionError("unexpected reflection issue", e); + } + } + + private String getHostByAddr(byte[] addr) throws UnknownHostException { + if (addr[0] == 127) { + return InetAddresses.toAddrString(InetAddress.getByAddress(addr)); + } + + String hostname; + synchronized (FakeDNS.this) { + hostname = reverseResolutions.get(InetAddress.getByAddress(addr)); + } + if (hostname != null) { + return hostname; + } + + try { + Method method = fallbackNameService.getClass() + .getDeclaredMethod("getHostByAddr", byte[].class); + method.setAccessible(true); + return (String) method.invoke(fallbackNameService, (Object) addr); + } catch (ReflectiveOperationException e) { + Throwables.propagateIfPossible(e.getCause(), UnknownHostException.class); + throw new AssertionError("unexpected reflection issue", e); + } + } + + @Override + public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { + switch (method.getName()) { + case "lookupAllHostAddr": + return lookupAllHostAddr((String) args[0]); + case "getHostByAddr": + return getHostByAddr((byte[]) args[0]); + default: + throw new UnsupportedOperationException(); + } + } + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/KuduBinaryInfo.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/KuduBinaryInfo.java new file mode 100644 index 0000000000..f65cd31c7a --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/KuduBinaryInfo.java @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test.cluster; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; + +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +/** + * Simple struct to provide various properties of a binary artifact to callers. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class KuduBinaryInfo { + private final String binDir; + private final String saslDir; + + public KuduBinaryInfo(String binDir, String saslDir) { + this.binDir = binDir; + this.saslDir = saslDir; + } + + public KuduBinaryInfo(String binDir) { + this(binDir, null); + } + + /** + * Return the binary directory of an extracted artifact. + */ + public String getBinDir() { + return binDir; + } + + /** + * Return the SASL module directory of an extracted artifact. + * May be {@code null} if unknown. + */ + public String getSaslDir() { + return saslDir; + } + + /** + * The C++ sanitizer type enabled for the kudu CLI binary. + */ + public enum SanitizerType { + NONE, + ASAN, + TSAN, + } + + /** + * @return sanitizer type for the kudu CLI binary. + */ + public static SanitizerType getSanitizerType() { + List vs = getBinaryVersionStrings(); + if (vs.size() < 1 || !vs.get(0).startsWith("kudu ")) { + throw new RuntimeException(String.format( + "unexpected version output from kudu binary: %s", + Joiner.on("\n").join(vs))); + } + for (String s : vs) { + if (s.equals("ASAN enabled")) { + return SanitizerType.ASAN; + } else if (s.equals("TSAN enabled")) { + return SanitizerType.TSAN; + } + } + return SanitizerType.NONE; + } + + /** + * @return sequence of strings output by 'kudu --version' + */ + private static List getBinaryVersionStrings() { + try { + KuduBinaryLocator.ExecutableInfo exeInfo = + KuduBinaryLocator.findBinary("kudu"); + ProcessBuilder pb = new ProcessBuilder( + Lists.newArrayList(exeInfo.exePath(), "--version")); + pb.environment().putAll(exeInfo.environment()); + pb.redirectError(ProcessBuilder.Redirect.INHERIT); + final Process p = pb.start(); + List result = new ArrayList<>(); + try (InputStreamReader isr = new InputStreamReader(p.getInputStream(), UTF_8); + BufferedReader br = new BufferedReader(isr)) { + while (true) { + String line = br.readLine(); + if (line == null) { + break; + } + result.add(line); + } + } + final int exitCode = p.waitFor(); + if (exitCode != 0) { + // Don't bother reporting the contents of stderr: it should be in the + // log of the parent process due to the stderr redirection. + throw new RuntimeException(String.format( + "unexpected exit code from kudu binary: %d", exitCode)); + } + return result; + } catch (IOException e) { + throw new RuntimeException( + "unexpected exception while trying to run kudu binary", e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException( + "unexpected exception while trying to run kudu binary", e); + } + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/KuduBinaryJarExtractor.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/KuduBinaryJarExtractor.java new file mode 100644 index 0000000000..e34ea66888 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/KuduBinaryJarExtractor.java @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test.cluster; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.StandardCopyOption; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.PosixFilePermission; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import com.google.common.base.Preconditions; +import com.google.gradle.osdetector.OsDetector; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class to find and extract Kudu binary jars from the classpath + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class KuduBinaryJarExtractor { + + private static final Logger LOG = LoggerFactory.getLogger(KuduBinaryJarExtractor.class); + private static final String KUDU_TEST_BIN_PROPS_PATH = + "META-INF/apache-kudu-test-binary.properties"; + private static final OsDetector DETECTOR = new OsDetector(); + + /** Return the thread context classloader or the parent classloader for this class. */ + private static ClassLoader getCurrentClassLoader() { + ClassLoader loader = Thread.currentThread().getContextClassLoader(); + if (loader != null) { + return loader; + } + return KuduBinaryJarExtractor.class.getClassLoader(); + } + + private static Properties getBinaryProps() throws IOException { + Enumeration resources = getCurrentClassLoader().getResources(KUDU_TEST_BIN_PROPS_PATH); + while (resources.hasMoreElements()) { + URL url = resources.nextElement(); + try { + Properties props = loadBinaryProps(url); + if (DETECTOR.getOs().equals(props.getProperty("artifact.os")) && + DETECTOR.getArch().equals(props.getProperty("artifact.arch"))) { + return props; + } + } catch (IOException ex) { + LOG.warn("Unable to parse properties file from Kudu binary artifact", ex); + } + } + return null; + } + + private static Properties loadBinaryProps(URL url) throws IOException { + Properties props = new Properties(); + props.load(url.openStream()); + return props; + } + + /** + * Determine if the classpath has a Kudu binary test jar compatible with the system architecture + * and operating system. + * If a Thread context ClassLoader is set, then that ClassLoader is searched. + * Otherwise, the ClassLoader that loaded this class is searched. + * + *

TODO: at the time of writing, OS and architecture checks are not yet implemented. + * + * @return {@code true} if an appropriate Kudu binary jar is available, {@code false} otherwise + */ + public boolean isKuduBinaryJarOnClasspath() throws IOException { + Properties binaryProps = getBinaryProps(); + return binaryProps != null; + } + + /** + * Extract the Kudu binary test jar found on the classpath to the specified location. + * If a Thread context ClassLoader is set, then that ClassLoader is searched. + * Otherwise, the ClassLoader that loaded this class is searched. + * + *

It is expected that + * {@link #isKuduBinaryJarOnClasspath()} should return {@code true} before this method is invoked. + * + * @param destDir path to a destination + * @return information about the extracted artifact + * @throws FileNotFoundException if the binary JAR cannot not be located + * @throws IOException if the JAR extraction process fails + */ + public KuduBinaryInfo extractKuduBinaryArtifact(String destDir) throws IOException { + Properties binaryProps = getBinaryProps(); + if (binaryProps == null) { + throw new FileNotFoundException("Could not locate the Kudu binary test jar"); + } + + String prefix = binaryProps.getProperty("artifact.prefix"); + URL artifactPrefix = getCurrentClassLoader().getResource(prefix); + if (artifactPrefix == null) { + throw new FileNotFoundException("Cannot find Kudu artifact prefix dir: " + prefix); + } + + try { + Path artifactRoot = extractJar(artifactPrefix.toURI(), prefix, Paths.get(destDir)); + Path binDir = Paths.get(artifactRoot.toString(), "bin"); + if (!binDir.toFile().exists()) { + throw new FileNotFoundException("Cannot find Kudu artifact bin dir: " + binDir.toString()); + } + + // Only set the saslDir property if we find it in the artifact, since that affects whether + // the caller needs to set SASL_PATH when executing the binaries. + Path saslDir = Paths.get(artifactRoot.toString(), "lib", "sasl2"); + String saslDirString = null; + if (saslDir.toFile().exists()) { + saslDirString = saslDir.toAbsolutePath().toString(); + } + + return new KuduBinaryInfo(binDir.toString(), saslDirString); + } catch (URISyntaxException e) { + throw new IOException("Cannot unpack Kudu binary jar", e); + } + } + + /** + * Extracts the given prefix of the given jar into the target directory. + * Accessible for testing only. + * @param src URI of the source jar + * @param prefix prefix of the jar to extract into the destination directory + * @param target destination directory + * @return an absolute path to the extracted artifact, including the prefix portion + */ + static Path extractJar(URI src, String prefix, final Path target) throws IOException { + Preconditions.checkArgument("jar".equals(src.getScheme()), "src URI must use a 'jar' scheme"); + if (Files.notExists(target)) { + Files.createDirectory(target); + } + + Map env = new HashMap<>(); + try (FileSystem zipFileSystem = FileSystems.newFileSystem(src, env)) { + + Path root = zipFileSystem.getPath(prefix); + Files.walkFileTree(root, new SimpleFileVisitor() { + + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attributes) + throws IOException { + Path newDir = Paths.get(target.toString(), dir.toString()); + Files.copy(dir, newDir, StandardCopyOption.REPLACE_EXISTING); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attributes) + throws IOException { + Path newFile = Paths.get(target.toString(), file.toString()); + Files.copy(file, newFile, StandardCopyOption.REPLACE_EXISTING); + + Path parent = file.getParent(); + if (parent != null && parent.endsWith("bin")) { + Set perms = Files.getPosixFilePermissions(newFile); + perms.add(PosixFilePermission.OWNER_EXECUTE); + Files.setPosixFilePermissions(newFile, perms); + } + return FileVisitResult.CONTINUE; + } + }); + } + return Paths.get(target.toString(), prefix).toAbsolutePath(); + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/KuduBinaryLocator.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/KuduBinaryLocator.java new file mode 100644 index 0000000000..9627e6e70f --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/KuduBinaryLocator.java @@ -0,0 +1,142 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test.cluster; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map; + +import com.google.common.base.Preconditions; +import com.google.common.io.CharStreams; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.test.TempDirUtils; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class KuduBinaryLocator { + + private static final String SASL_PATH_NAME = "SASL_PATH"; + private static final String KUDU_BIN_DIR_PROP = "kuduBinDir"; + private static final Logger LOG = LoggerFactory.getLogger(KuduBinaryLocator.class); + + @InterfaceAudience.Private + @InterfaceStability.Unstable + public static class ExecutableInfo { + private final String exePath; + private final Map env; + + public ExecutableInfo(String exePath, Map env) { + Preconditions.checkNotNull(exePath); + Preconditions.checkNotNull(env); + this.exePath = exePath; + this.env = env; + } + + /** Path to the executable. */ + public String exePath() { + return exePath; + } + + /** Any environment variables that should be set when running the executable. */ + public Map environment() { + return env; + } + } + + /** + * Find the binary directory within the build tree. + *

+ * Uses the following priority: + * - If kuduBinDir system property is set, use that. + * - If the `kudu` binary is found on the PATH using `which kudu`, + * use its parent directory. + */ + private static KuduBinaryInfo findBinaryLocation() { + // If kuduBinDir system property is set, use that. + String kuduBinDirProp = System.getProperty(KUDU_BIN_DIR_PROP); + if (kuduBinDirProp != null) { + LOG.info("Using Kudu binary directory specified by system property '{}': {}", + KUDU_BIN_DIR_PROP, kuduBinDirProp); + return new KuduBinaryInfo(kuduBinDirProp); + } + + try { + KuduBinaryJarExtractor extractor = new KuduBinaryJarExtractor(); + if (extractor.isKuduBinaryJarOnClasspath()) { + File testTmpDir = TempDirUtils.makeTempDirectory("kudu-binary-jar", + TempDirUtils.DeleteOnExit.DELETE_RECURSIVELY_ON_EXIT); + LOG.info("Using Kudu binary jar directory: {}", testTmpDir.getAbsolutePath()); + return extractor.extractKuduBinaryArtifact(testTmpDir.getAbsolutePath()); + } + } catch (IOException ex) { + LOG.warn("Unable to extract a Kudu binary jar", ex); + } + + // If the `kudu` binary is found on the PATH using `which kudu`, use its parent directory. + try { + Runtime runtime = Runtime.getRuntime(); + Process process = runtime.exec("which kudu"); + int errorCode = process.waitFor(); + if (errorCode == 0) { + try (Reader reader = new InputStreamReader(process.getInputStream(), UTF_8)) { + String kuduBinary = CharStreams.toString(reader); + String kuduBinDir = new File(kuduBinary).getParent(); + LOG.info("Using Kudu binary directory found on path with 'which kudu': {}", kuduBinDir); + return new KuduBinaryInfo(kuduBinDir); + } + } + } catch (IOException | InterruptedException ex) { + throw new RuntimeException("Error while locating kudu binary", ex); + } + + throw new RuntimeException("Set the system variable " + KUDU_BIN_DIR_PROP + " or add the Kudu" + + " binary test jar to your classpath or ensure the `kudu` binary is on your path."); + } + + /** + * @param exeName the binary to look for (eg 'kudu-tserver') + * @return the absolute path of that binary + * @throws FileNotFoundException if no such binary is found + */ + public static ExecutableInfo findBinary(String exeName) throws FileNotFoundException { + KuduBinaryInfo artifactInfo = findBinaryLocation(); + + File executable = new File(artifactInfo.getBinDir(), exeName); + if (!executable.exists() || !executable.canExecute()) { + throw new FileNotFoundException("Cannot find executable " + exeName + + " in binary directory " + artifactInfo.getBinDir()); + } + + Map env = new HashMap<>(); + if (artifactInfo.getSaslDir() != null) { + env.put(SASL_PATH_NAME, artifactInfo.getSaslDir()); + } + + return new ExecutableInfo(executable.getAbsolutePath(), env); + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/MiniKuduCluster.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/MiniKuduCluster.java new file mode 100644 index 0000000000..ce51491b2c --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/cluster/MiniKuduCluster.java @@ -0,0 +1,892 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test.cluster; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.file.Paths; +import java.security.Security; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.Common; +import org.apache.kudu.client.HostAndPort; +import org.apache.kudu.client.ProtobufHelper; +import org.apache.kudu.test.KuduTestHarness; +import org.apache.kudu.test.TempDirUtils; +import org.apache.kudu.tools.Tool.ControlShellRequestPB; +import org.apache.kudu.tools.Tool.ControlShellResponsePB; +import org.apache.kudu.tools.Tool.CreateClusterRequestPB; +import org.apache.kudu.tools.Tool.CreateClusterRequestPB.JwksOptionsPB; +import org.apache.kudu.tools.Tool.CreateClusterRequestPB.MiniKdcOptionsPB; +import org.apache.kudu.tools.Tool.CreateClusterRequestPB.MiniOidcOptionsPB; +import org.apache.kudu.tools.Tool.CreateJwtRequestPB; +import org.apache.kudu.tools.Tool.DaemonIdentifierPB; +import org.apache.kudu.tools.Tool.DaemonInfoPB; +import org.apache.kudu.tools.Tool.GetKDCEnvVarsRequestPB; +import org.apache.kudu.tools.Tool.GetMastersRequestPB; +import org.apache.kudu.tools.Tool.GetTServersRequestPB; +import org.apache.kudu.tools.Tool.KdestroyRequestPB; +import org.apache.kudu.tools.Tool.KinitRequestPB; +import org.apache.kudu.tools.Tool.PauseDaemonRequestPB; +import org.apache.kudu.tools.Tool.ResumeDaemonRequestPB; +import org.apache.kudu.tools.Tool.SetDaemonFlagRequestPB; +import org.apache.kudu.tools.Tool.StartClusterRequestPB; +import org.apache.kudu.tools.Tool.StartDaemonRequestPB; +import org.apache.kudu.tools.Tool.StopDaemonRequestPB; +import org.apache.kudu.util.SecurityUtil; + +/** + * Utility class to start and manipulate Kudu clusters. Depends on precompiled + * kudu, kudu-master, and kudu-tserver binaries. {@link KuduTestHarness} + * should be used instead of directly using this class in almost all cases. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class MiniKuduCluster implements AutoCloseable { + + private static final Logger LOG = LoggerFactory.getLogger(MiniKuduCluster.class); + + // Control shell process. + private Process miniCluster; + + // Request channel to the control shell. + private DataOutputStream miniClusterStdin; + + // Response channel from the control shell. + private DataInputStream miniClusterStdout; + + // Thread that reads and logs stderr from the control shell. + private Thread miniClusterErrorPrinter; + + private static class DaemonInfo { + DaemonIdentifierPB id; + boolean isRunning; + boolean isPaused; + String webServerAddress; + } + + // Map of master addresses to daemon information. + private final Map masterServers = Maps.newHashMap(); + + // Map of tserver addresses to daemon information. + private final Map tabletServers = Maps.newHashMap(); + + // Builder-provided cluster configuration state. + private final boolean enableKerberos; + private final int numMasters; + private final int numTservers; + private final ImmutableList extraTserverFlags; + private final ImmutableList extraMasterFlags; + private final ImmutableList locationInfo; + private final String clusterRoot; + private final String principal; + + private MiniKdcOptionsPB kdcOptionsPb; + private final Common.HmsMode hmsMode; + private MiniOidcOptionsPB oidcOptionsPb; + + private MiniKuduCluster(boolean enableKerberos, + int numMasters, + int numTservers, + List extraTserverFlags, + List extraMasterFlags, + List locationInfo, + MiniKdcOptionsPB kdcOptionsPb, + String clusterRoot, + Common.HmsMode hmsMode, + String principal, + MiniOidcOptionsPB oidcOptionsPb) { + this.enableKerberos = enableKerberos; + this.numMasters = numMasters; + this.numTservers = numTservers; + this.extraTserverFlags = ImmutableList.copyOf(extraTserverFlags); + this.extraMasterFlags = ImmutableList.copyOf(extraMasterFlags); + this.locationInfo = ImmutableList.copyOf(locationInfo); + this.kdcOptionsPb = kdcOptionsPb; + this.principal = principal; + this.hmsMode = hmsMode; + this.oidcOptionsPb = oidcOptionsPb; + + if (clusterRoot == null) { + // If a cluster root was not set, create a unique temp directory to use. + // The mini cluster will clean this directory up on exit. + try { + File tempRoot = TempDirUtils.makeTempDirectory("mini-kudu-cluster", + TempDirUtils.DeleteOnExit.NO_DELETE_ON_EXIT); + this.clusterRoot = tempRoot.toString(); + } catch (IOException ex) { + throw new RuntimeException("Could not create cluster root directory", ex); + } + } else { + this.clusterRoot = clusterRoot; + } + + // Default Java security settings are restrictive with regard to RSA key + // length. Since Kudu masters and tablet servers in MiniKuduCluster use + // smaller RSA keys to shorten runtime of tests, it's necessary to override + // those default security settings to allow for using relaxed cryptography, + // particularly smaller RSA keys. + Security.setProperty("jdk.certpath.disabledAlgorithms", "MD2, RC4, MD5"); + Security.setProperty("jdk.tls.disabledAlgorithms", "SSLv3, RC4, MD5"); + } + + /** + * Sends a command to the control shell and receives its response. + *

+ * The method is synchronized to prevent interleaving of requests and responses. + * @param req control shell request + * @return control shell response + * @throws IOException if there was some kind of transport error, or if the + * response indicates an error + */ + private synchronized ControlShellResponsePB sendRequestToCluster(ControlShellRequestPB req) + throws IOException { + // Send the request's size (4 bytes, big endian) followed by the request. + LOG.debug("Request: {}", req); + miniClusterStdin.writeInt(req.getSerializedSize()); + miniClusterStdin.write(req.toByteArray()); + miniClusterStdin.flush(); + + // Read the response's size (4 bytes, big endian) followed by the response. + int respLength = miniClusterStdout.readInt(); + byte[] respBody = new byte[respLength]; + miniClusterStdout.readFully(respBody); + ControlShellResponsePB resp = ControlShellResponsePB.parseFrom(respBody); + LOG.debug("Response: {}", resp); + + // Convert any error into an exception. + if (resp.hasError()) { + throw new IOException(resp.getError().getMessage()); + } + return resp; + } + + /** + * Starts this Kudu cluster. + * @throws IOException if something went wrong in transit + */ + private synchronized void start() throws IOException { + Preconditions.checkArgument(numMasters > 0, "Need at least one master"); + + // Start the control shell and the communication channel to it. + KuduBinaryLocator.ExecutableInfo exeInfo = KuduBinaryLocator.findBinary("kudu"); + List commandLine = Lists.newArrayList(exeInfo.exePath(), + "test", + "mini_cluster", + "--serialization=pb"); + LOG.info("Starting process: {}", commandLine); + ProcessBuilder processBuilder = new ProcessBuilder(commandLine); + processBuilder.environment().putAll(exeInfo.environment()); + + miniCluster = processBuilder.start(); + miniClusterStdin = new DataOutputStream(miniCluster.getOutputStream()); + miniClusterStdout = new DataInputStream(miniCluster.getInputStream()); + + // Set up a thread that logs stderr from the control shell; this will + // include all cluster logging. + ProcessInputStreamLogPrinterRunnable printer = + new ProcessInputStreamLogPrinterRunnable(miniCluster.getErrorStream()); + miniClusterErrorPrinter = new Thread(printer); + miniClusterErrorPrinter.setDaemon(true); + miniClusterErrorPrinter.setName("cluster stderr printer"); + miniClusterErrorPrinter.start(); + + CreateClusterRequestPB.Builder createClusterRequestBuilder = CreateClusterRequestPB.newBuilder() + .setNumMasters(numMasters) + .setNumTservers(numTservers) + .setEnableKerberos(enableKerberos) + .setHmsMode(hmsMode) + .addAllExtraMasterFlags(extraMasterFlags) + .addAllExtraTserverFlags(extraTserverFlags) + .setMiniKdcOptions(kdcOptionsPb) + .setClusterRoot(clusterRoot) + .setPrincipal(principal) + .setMiniOidcOptions(oidcOptionsPb); + + // Set up the location mapping command flag if there is location info. + if (!locationInfo.isEmpty()) { + List locationMappingCmd = new ArrayList<>(); + locationMappingCmd.add(getClass().getResource("/assign-location.py").getFile()); + String locationMappingCmdPath = + Paths.get(clusterRoot, "location-assignment.state").toString(); + locationMappingCmd.add("--state_store=" + locationMappingCmdPath); + for (String location : locationInfo) { + locationMappingCmd.add("--map " + location); + } + String locationMappingCmdFlag = "--location_mapping_cmd=" + + Joiner.on(" ").join(locationMappingCmd); + createClusterRequestBuilder.addExtraMasterFlags(locationMappingCmdFlag); + } + + // Create and start the cluster. + sendRequestToCluster( + ControlShellRequestPB.newBuilder() + .setCreateCluster(createClusterRequestBuilder.build()) + .build()); + sendRequestToCluster( + ControlShellRequestPB.newBuilder() + .setStartCluster(StartClusterRequestPB.newBuilder().build()) + .build()); + + // If the cluster is Kerberized, retrieve the KDC's environment variables + // and adapt them into certain security-related system properties. + if (enableKerberos) { + ControlShellResponsePB resp = sendRequestToCluster( + ControlShellRequestPB.newBuilder() + .setGetKdcEnvVars(GetKDCEnvVarsRequestPB.newBuilder().build()) + .build()); + for (Map.Entry e : resp.getGetKdcEnvVars().getEnvVarsMap().entrySet()) { + if (e.getKey().equals("KRB5_CONFIG")) { + System.setProperty("java.security.krb5.conf", e.getValue()); + } else if (e.getKey().equals("KRB5CCNAME")) { + System.setProperty(SecurityUtil.KUDU_TICKETCACHE_PROPERTY, e.getValue()); + } + } + } + + // Initialize the maps of master and tablet servers. + ControlShellResponsePB resp = sendRequestToCluster( + ControlShellRequestPB.newBuilder() + .setGetMasters(GetMastersRequestPB.newBuilder().build()) + .build()); + for (DaemonInfoPB info : resp.getGetMasters().getMastersList()) { + DaemonInfo d = new DaemonInfo(); + d.id = info.getId(); + d.isRunning = true; + d.isPaused = false; + d.webServerAddress = String.join(":", info.getBoundHttpAddress().getHost(), + Integer.toString(info.getBoundHttpAddress().getPort())); + masterServers.put(ProtobufHelper.hostAndPortFromPB(info.getBoundRpcAddress()), d); + } + resp = sendRequestToCluster( + ControlShellRequestPB.newBuilder() + .setGetTservers(GetTServersRequestPB.newBuilder().build()) + .build()); + for (DaemonInfoPB info : resp.getGetTservers().getTserversList()) { + DaemonInfo d = new DaemonInfo(); + d.id = info.getId(); + d.isRunning = true; + d.isPaused = false; + d.webServerAddress = String.join(":", info.getBoundHttpAddress().getHost(), + Integer.toString(info.getBoundHttpAddress().getPort())); + tabletServers.put(ProtobufHelper.hostAndPortFromPB(info.getBoundRpcAddress()), d); + } + } + + /** + * @return a comma-separated list of RPC addresses of all masters in the cluster + */ + public String getMasterAddressesAsString() { + return Joiner.on(',').join(masterServers.keySet()); + } + + /** + * @return a comma-separated list of webserver addresses of all masters in the cluster + */ + public String getMasterWebServerAddressesAsString() { + List addresses = new ArrayList(); + masterServers.forEach((hp, daemonInfo) -> { + addresses.add(daemonInfo.webServerAddress); + }); + + return Joiner.on(',').join(addresses); + } + + /** + * @return the list of master servers + */ + public List getMasterServers() { + return new ArrayList<>(masterServers.keySet()); + } + + /** + * @return the list of tablet servers + */ + public List getTabletServers() { + return new ArrayList<>(tabletServers.keySet()); + } + + /** + * @return the service principal name + */ + public String getPrincipal() { + return principal; + } + + public String createJwtFor(String accountId, String subject, boolean isValid) throws IOException { + ControlShellResponsePB resp = sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setCreateJwt(CreateJwtRequestPB + .newBuilder() + .setAccountId(accountId) + .setSubject(subject) + .setIsValidKey(isValid) + .build()) + .build()); + return resp.getCreateJwt().getJwt(); + } + + /** + * Starts a master identified by a host and port. + * Does nothing if the server was already running. + * + * @param hp unique host and port identifying the server + * @throws IOException if something went wrong in transit + */ + public void startMasterServer(HostAndPort hp) throws IOException { + DaemonInfo d = getMasterServer(hp); + if (d.isRunning) { + return; + } + LOG.info("Starting master server {}", hp); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setStartDaemon(StartDaemonRequestPB.newBuilder().setId(d.id).build()) + .build()); + d.isRunning = true; + } + + /** + * Kills a master identified identified by an host and port. + * Does nothing if the master was already dead. + * + * @param hp unique host and port identifying the server + * @throws IOException if something went wrong in transit + */ + public void killMasterServer(HostAndPort hp) throws IOException { + DaemonInfo d = getMasterServer(hp); + if (!d.isRunning) { + return; + } + LOG.info("Killing master server {}", hp); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setStopDaemon(StopDaemonRequestPB.newBuilder().setId(d.id).build()) + .build()); + d.isRunning = false; + } + + /** + * Pauses a master identified identified by the specified host and port. + * Does nothing if the master is already paused. + * + * @param hp unique host and port identifying the server + * @throws IOException if something went wrong in transit + */ + public void pauseMasterServer(HostAndPort hp) throws IOException { + DaemonInfo d = getMasterServer(hp); + if (d.isPaused) { + return; + } + LOG.info("pausing master server {}", hp); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setPauseDaemon(PauseDaemonRequestPB.newBuilder().setId(d.id).build()) + .build()); + d.isPaused = true; + } + + /** + * Resumes a master identified identified by the specified host and port. + * Does nothing if the master isn't paused. + * + * @param hp unique host and port identifying the server + * @throws IOException if something went wrong in transit + */ + public void resumeMasterServer(HostAndPort hp) throws IOException { + DaemonInfo d = getMasterServer(hp); + if (!d.isPaused) { + return; + } + LOG.info("resuming master server {}", hp); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setResumeDaemon(ResumeDaemonRequestPB.newBuilder().setId(d.id).build()) + .build()); + d.isPaused = false; + } + + /** + * Starts a tablet server identified by an host and port. + * Does nothing if the server was already running. + * + * @param hp unique host and port identifying the server + * @throws IOException if something went wrong in transit + */ + public void startTabletServer(HostAndPort hp) throws IOException { + DaemonInfo d = getTabletServer(hp); + if (d.isRunning) { + return; + } + LOG.info("Starting tablet server {}", hp); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setStartDaemon(StartDaemonRequestPB.newBuilder().setId(d.id).build()) + .build()); + d.isRunning = true; + } + + /** + * Kills a tablet server identified by an host and port. + * Does nothing if the tablet server was already dead. + * + * @param hp unique host and port identifying the server + * @throws IOException if something went wrong in transit + */ + public void killTabletServer(HostAndPort hp) throws IOException { + DaemonInfo d = getTabletServer(hp); + if (!d.isRunning) { + return; + } + LOG.info("Killing tablet server {}", hp); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setStopDaemon(StopDaemonRequestPB.newBuilder().setId(d.id).build()) + .build()); + d.isRunning = false; + } + + /** + * Pauses a tablet server identified by the specified host and port. + * Does nothing if the tablet server is already paused. + * + * @param hp unique host and port identifying the server + * @throws IOException if something went wrong in transit + */ + public void pauseTabletServer(HostAndPort hp) throws IOException { + DaemonInfo d = getTabletServer(hp); + if (d.isPaused) { + return; + } + LOG.info("pausing tablet server {}", hp); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setPauseDaemon(PauseDaemonRequestPB.newBuilder().setId(d.id).build()) + .build()); + d.isPaused = true; + } + + /** + * Resumes a tablet server identified by the specified host and port. + * Does nothing if the tablet server isn't paused. + * + * @param hp unique host and port identifying the server + * @throws IOException if something went wrong in transit + */ + public void resumeTabletServer(HostAndPort hp) throws IOException { + DaemonInfo d = getTabletServer(hp); + if (!d.isPaused) { + return; + } + LOG.info("resuming tablet server {}", hp); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setResumeDaemon(ResumeDaemonRequestPB.newBuilder().setId(d.id).build()) + .build()); + d.isPaused = true; + } + + /** + * Kills all the master servers. + * Does nothing to the servers that are already dead. + * + * @throws IOException if something went wrong in transit + */ + public void killAllMasterServers() throws IOException { + for (Map.Entry e : masterServers.entrySet()) { + killMasterServer(e.getKey()); + } + } + + /** + * Starts all the master servers. + * Does nothing to the servers that are already running. + * + * @throws IOException if something went wrong in transit + */ + public void startAllMasterServers() throws IOException { + for (Map.Entry e : masterServers.entrySet()) { + startMasterServer(e.getKey()); + } + } + + /** + * Kills all tablet servers. + * Does nothing to the servers that are already dead. + * + * @throws IOException if something went wrong in transit + */ + public void killAllTabletServers() throws IOException { + for (Map.Entry e : tabletServers.entrySet()) { + killTabletServer(e.getKey()); + } + } + + /** + * Starts all the tablet servers. + * Does nothing to the servers that are already running. + * + * @throws IOException if something went wrong in transit + */ + public void startAllTabletServers() throws IOException { + for (Map.Entry e : tabletServers.entrySet()) { + startTabletServer(e.getKey()); + } + } + + /** + * Set flag for the specified master. + * + * @param hp unique host and port identifying the target master + * @throws IOException if something went wrong in transit + */ + public void setMasterFlag(HostAndPort hp, String flag, String value) + throws IOException { + DaemonInfo d = getMasterServer(hp); + LOG.info("Setting flag for master at {}", hp); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setSetDaemonFlag(SetDaemonFlagRequestPB.newBuilder() + .setId(d.id) + .setFlag(flag) + .setValue(value) + .build()) + .build()); + } + + /** + * Set flag for the specified tablet server. + * + * @param hp unique host and port identifying the target tablet server + * @throws IOException if something went wrong in transit + */ + public void setTServerFlag(HostAndPort hp, String flag, String value) + throws IOException { + DaemonInfo d = getTabletServer(hp); + LOG.info("Setting flag for tserver at {}", hp); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setSetDaemonFlag(SetDaemonFlagRequestPB.newBuilder() + .setId(d.id) + .setFlag(flag) + .setValue(value) + .build()) + .build()); + } + + /** + * Removes all credentials for all principals from the Kerberos credential cache. + */ + public void kdestroy() throws IOException { + LOG.info("Destroying all Kerberos credentials"); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setKdestroy(KdestroyRequestPB.getDefaultInstance()) + .build()); + } + + /** + * Re-initialize Kerberos credentials for the given username, writing them + * into the Kerberos credential cache. + * @param username the username to kinit as + */ + public void kinit(String username) throws IOException { + LOG.info("Running kinit for user {}", username); + sendRequestToCluster(ControlShellRequestPB.newBuilder() + .setKinit(KinitRequestPB.newBuilder().setUsername(username).build()) + .build()); + } + + @Override + public void close() { + shutdown(); + } + + /** + * Shuts down a Kudu cluster. + */ + public synchronized void shutdown() { + // Closing stdin should cause the control shell process to terminate. + if (miniClusterStdin != null) { + try { + miniClusterStdin.close(); + } catch (IOException e) { + LOG.info("Caught exception while closing minicluster stdin", e); + } + } + if (miniClusterStdout != null) { + try { + miniClusterStdout.close(); + } catch (IOException e) { + LOG.info("Caught exception while closing minicluster stdout", e); + } + } + if (miniClusterErrorPrinter != null) { + try { + miniClusterErrorPrinter.join(); + } catch (InterruptedException e) { + LOG.info("Caught exception while closing minicluster stderr", e); + } + } + if (miniCluster != null) { + try { + miniCluster.waitFor(); + } catch (InterruptedException e) { + LOG.warn("Minicluster process did not exit, destroying"); + miniCluster.destroy(); + } + } + } + + /** + * Returns a master server identified by an address. + * + * @param hp unique host and port identifying the server + * @return the DaemonInfo of the server + * @throws RuntimeException if the server is not found + */ + private DaemonInfo getMasterServer(HostAndPort hp) throws RuntimeException { + DaemonInfo d = masterServers.get(hp); + if (d == null) { + throw new RuntimeException(String.format("Master server %s not found", hp)); + } + return d; + } + + /** + * Returns a tablet server identified by an address. + * + * @param hp unique host and port identifying the server + * @return the DaemonInfo of the server + * @throws RuntimeException if the server is not found + */ + private DaemonInfo getTabletServer(HostAndPort hp) throws RuntimeException { + DaemonInfo d = tabletServers.get(hp); + if (d == null) { + throw new RuntimeException(String.format("Tablet server %s not found", hp)); + } + return d; + } + + /** + * @return path to the mini cluster root directory + */ + public String getClusterRoot() { + return clusterRoot; + } + + /** + * @return cluster's CA certificate in DER format or an empty array + */ + public byte[] getCACertDer() throws IOException { + String masterHttpAddr = Iterables.get(Splitter.on(',') + .split(getMasterWebServerAddressesAsString()), 0); + URL url = new URL("http://" + masterHttpAddr + "/ipki-ca-cert-der"); + HttpURLConnection connection = (HttpURLConnection)url.openConnection(); + connection.setRequestMethod("GET"); + connection.connect(); + + if (connection.getResponseCode() != 200) { + connection.disconnect(); + return new byte[0]; + } + + InputStream urlData = connection.getInputStream(); + int contentSize = connection.getContentLength(); + byte[] data = new byte[contentSize]; + int numBytesRead = urlData.read(data); + if (numBytesRead != contentSize) { + connection.disconnect(); + return new byte[0]; + } + return data; + } + + /** + * Helper runnable that receives stderr and logs it along with the process' identifier. + */ + public static class ProcessInputStreamLogPrinterRunnable implements Runnable { + + private final InputStream is; + + public ProcessInputStreamLogPrinterRunnable(InputStream is) { + this.is = is; + } + + @Override + public void run() { + try { + String line; + BufferedReader in = new BufferedReader( + new InputStreamReader(is, UTF_8)); + while ((line = in.readLine()) != null) { + LOG.info(line); + } + in.close(); + } catch (Exception e) { + if (!e.getMessage().contains("Stream closed")) { + LOG.error("Caught error while reading a process' output", e); + } + } + } + } + + /** + * Builder for {@link MiniKuduCluster} + */ + public static class MiniKuduClusterBuilder { + + private int numMasterServers = 1; + private int numTabletServers = 3; + private boolean enableKerberos = false; + private final List extraTabletServerFlags = new ArrayList<>(); + private final List extraMasterServerFlags = new ArrayList<>(); + private final List locationInfo = new ArrayList<>(); + private String clusterRoot = null; + private String principal = "kudu"; + + private MiniKdcOptionsPB.Builder kdcOptionsPb = MiniKdcOptionsPB.newBuilder(); + private MiniOidcOptionsPB.Builder oidcOptionsPb = MiniOidcOptionsPB.newBuilder(); + private Common.HmsMode hmsMode = Common.HmsMode.NONE; + + public MiniKuduClusterBuilder numMasterServers(int numMasterServers) { + this.numMasterServers = numMasterServers; + return this; + } + + public MiniKuduClusterBuilder numTabletServers(int numTabletServers) { + this.numTabletServers = numTabletServers; + return this; + } + + /** + * Enables Kerberos on the mini cluster and acquire client credentials for this process. + * @return this instance + */ + public MiniKuduClusterBuilder enableKerberos() { + enableKerberos = true; + return this; + } + + public MiniKuduClusterBuilder enableHiveMetastoreIntegration() { + hmsMode = Common.HmsMode.ENABLE_METASTORE_INTEGRATION; + return this; + } + + /** + * Adds a new flag to be passed to the Tablet Server daemons on start. + * @return this instance + */ + public MiniKuduClusterBuilder addTabletServerFlag(String flag) { + this.extraTabletServerFlags.add(flag); + return this; + } + + /** + * Adds a new flag to be passed to the Master daemons on start. + * @return this instance + */ + public MiniKuduClusterBuilder addMasterServerFlag(String flag) { + this.extraMasterServerFlags.add(flag); + return this; + } + + /** + * Adds one location to the minicluster configuration, consisting of a + * location and the total number of tablet servers and clients that + * can be assigned to the location. The 'location' string should be + * in the form 'location:number'. For example, + * "/L0:2" + * will add a location "/L0" that will accept up to two clients or + * tablet servers registered in it. + * @return this instance + */ + public MiniKuduClusterBuilder addLocation(String location) { + locationInfo.add(location); + return this; + } + + public MiniKuduClusterBuilder kdcTicketLifetime(String lifetime) { + this.kdcOptionsPb.setTicketLifetime(lifetime); + return this; + } + + public MiniKuduClusterBuilder kdcRenewLifetime(String lifetime) { + this.kdcOptionsPb.setRenewLifetime(lifetime); + return this; + } + + /** + * Sets the directory where the cluster's data and logs should be placed. + * @return this instance + */ + public MiniKuduClusterBuilder clusterRoot(String clusterRoot) { + this.clusterRoot = clusterRoot; + return this; + } + + public MiniKuduClusterBuilder principal(String principal) { + this.principal = principal; + return this; + } + + public MiniKuduClusterBuilder addJwks(String accountId, boolean isValid) { + this.oidcOptionsPb.addJwksOptions( + JwksOptionsPB.newBuilder() + .setAccountId(accountId) + .setIsValidKey(isValid) + .build()); + return this; + } + + /** + * Builds and starts a new {@link MiniKuduCluster} using builder state. + * @return the newly started {@link MiniKuduCluster} + * @throws IOException if something went wrong starting the cluster + */ + public MiniKuduCluster build() throws IOException { + MiniKuduCluster cluster = + new MiniKuduCluster(enableKerberos, + numMasterServers, numTabletServers, + extraTabletServerFlags, extraMasterServerFlags, locationInfo, + kdcOptionsPb.build(), clusterRoot, hmsMode, principal, + oidcOptionsPb.build()); + try { + cluster.start(); + } catch (IOException e) { + // MiniKuduCluster.close should not throw, so no need for a nested try/catch. + cluster.close(); + throw e; + } + return cluster; + } + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/junit/AssertHelpers.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/junit/AssertHelpers.java new file mode 100644 index 0000000000..b4250da0fb --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/junit/AssertHelpers.java @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test.junit; + +import static org.junit.Assert.assertTrue; + +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class AssertHelpers { + public interface BooleanExpression { + boolean get() throws Exception; + } + + // A looping check. It's mainly useful for scanners, since writes may take a little time to show + // up. + public static void assertEventuallyTrue(String description, BooleanExpression expression, + long timeoutMillis) throws Exception { + long deadlineNanos = System.nanoTime() + timeoutMillis * 1000000; + boolean success; + + do { + success = expression.get(); + if (success) { + break; + } + Thread.sleep(50); // Sleep for 50ms + } while (System.nanoTime() < deadlineNanos); + + assertTrue(description, success); + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/junit/ResultReporter.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/junit/ResultReporter.java new file mode 100644 index 0000000000..c16a07a6f5 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/junit/ResultReporter.java @@ -0,0 +1,256 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test.junit; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import org.apache.http.StatusLine; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.ContentType; +import org.apache.http.entity.mime.MultipartEntityBuilder; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Class to report test results to the flaky test server. */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class ResultReporter { + public enum Result { + SUCCESS, + FAILURE + } + + public static class Options { + private boolean reportResults = true; + private String httpEndpoint; + private String buildTag; + private String revision; + private String hostname; + private String buildConfig; + + public Options reportResults(boolean reportResults) { + this.reportResults = reportResults; + return this; + } + + public Options httpEndpoint(String httpEndpoint) { + this.httpEndpoint = httpEndpoint; + return this; + } + + public Options buildTag(String buildTag) { + this.buildTag = buildTag; + return this; + } + + public Options revision(String revision) { + this.revision = revision; + return this; + } + + public Options hostname(String hostname) { + this.hostname = hostname; + return this; + } + + public Options buildConfig(String buildConfig) { + this.buildConfig = buildConfig; + return this; + } + } + + private static final Logger LOG = LoggerFactory.getLogger(ResultReporter.class); + private static final String KUDU_REPORT_TEST_RESULTS_VAR = "KUDU_REPORT_TEST_RESULTS"; + private static final String TEST_RESULT_SERVER_VAR = "TEST_RESULT_SERVER"; + private static final String BUILD_TAG_VAR = "BUILD_TAG"; + private static final String GIT_REVISION_VAR = "GIT_REVISION"; + private static final String BUILD_CONFIG_VAR = "BUILD_CONFIG"; + + private final Options options; + + public ResultReporter() { + this(new Options() + .reportResults(isReportingConfigured()) + .httpEndpoint(getEnvStringWithDefault(TEST_RESULT_SERVER_VAR, + "localhost:8080")) + .buildTag(System.getenv(BUILD_TAG_VAR)) + .revision(System.getenv(GIT_REVISION_VAR)) + .buildConfig(System.getenv(BUILD_CONFIG_VAR)) + .hostname(getLocalHostname())); + } + + @InterfaceAudience.LimitedPrivate("Test") + ResultReporter(Options options) { + this.options = options; + } + + private static boolean isVarSetAndNonEmpty(String name) { + String var = System.getenv(name); + return var != null && !var.equals(""); + } + + private static boolean areRequiredReportingVarsSetAndNonEmpty() { + return isVarSetAndNonEmpty(BUILD_TAG_VAR) && + isVarSetAndNonEmpty(GIT_REVISION_VAR) && + isVarSetAndNonEmpty(BUILD_CONFIG_VAR); + } + + private static String reportingVarDump() { + List vars = new ArrayList<>(); + for (String var : ImmutableList.of(TEST_RESULT_SERVER_VAR, + BUILD_TAG_VAR, + GIT_REVISION_VAR, + BUILD_CONFIG_VAR)) { + vars.add(var + ": \"" + System.getenv(var) + "\""); + } + return Joiner.on(", ").join(vars); + } + + private static boolean isReportingConfigured() { + if (getEnvIntegerWithDefault(KUDU_REPORT_TEST_RESULTS_VAR, 0) == 0) { + return false; + } + if (!areRequiredReportingVarsSetAndNonEmpty()) { + throw new IllegalStateException("Not all required variables are set: " + + reportingVarDump()); + } + return true; + } + + private static String getEnvStringWithDefault(String name, + String defaultValue) { + String value = System.getenv(name); + if (value == null || value.isEmpty()) { + return defaultValue; + } + return value; + } + + private static int getEnvIntegerWithDefault(String name, int defaultValue) { + return Integer.parseInt(getEnvStringWithDefault( + name, String.valueOf(defaultValue))); + } + + /** + * Invokes the `hostname` UNIX utility to retrieve the machine's hostname. + * + * Note: this is not the same as InetAddress.getLocalHost().getHostName(), + * which performs a reverse DNS lookup and may return a different result, + * depending on the machine's networking configuration. The equivalent C++ + * code uses `hostname`, so it's important we do the same here for parity. + * + * @returns the local hostname + */ + @InterfaceAudience.LimitedPrivate("Test") + static String getLocalHostname() { + ProcessBuilder pb = new ProcessBuilder("hostname"); + try { + Process p = pb.start(); + try (InputStreamReader isr = new InputStreamReader(p.getInputStream(), UTF_8); + BufferedReader br = new BufferedReader(isr)) { + int rv = p.waitFor(); + if (rv != 0) { + throw new IllegalStateException(String.format( + "Process 'hostname' exited with exit status %d", rv)); + } + return br.readLine(); + } + } catch (InterruptedException | IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Reports a test result to the flaky test server. + * + * @param testName the display name of the JUnit test + * @param result success or failure + * @param logFile optionally, file containing log messages generated by the test + * @throws IOException if test reporting failed + */ + public void reportResult(String testName, Result result, File logFile) + throws IOException { + if (!options.reportResults) { + return; + } + + try (CloseableHttpClient client = HttpClients.createDefault()) { + final HttpPost post = new HttpPost("http://" + options.httpEndpoint + "/add_result"); + + // Set up the request with all form parts. + MultipartEntityBuilder meb = MultipartEntityBuilder.create(); + // In the backend, the BUILD_TAG field is called 'build_id', but we can't use + // that as an env variable because it'd collide with Jenkins' BUILD_ID. + meb.addTextBody("build_id", options.buildTag); + meb.addTextBody("hostname", options.hostname); + meb.addTextBody("revision", options.revision); + meb.addTextBody("build_config", options.buildConfig); + meb.addTextBody("test_name", testName); + // status=0 indicates success, status=1 indicates failure. + meb.addTextBody("status", Integer.toString(result == Result.SUCCESS ? 0 : 1)); + if (logFile != null) { + meb.addBinaryBody("log", logFile, ContentType.APPLICATION_OCTET_STREAM, + testName + ".txt.gz"); + } + post.setEntity(meb.build()); + + // Send the request and process the response. + try (CloseableHttpResponse resp = client.execute(post)) { + StatusLine sl = resp.getStatusLine(); + if (sl.getStatusCode() != 200) { + throw new IOException("Bad response from server: " + sl.getStatusCode() + ": " + + EntityUtils.toString(resp.getEntity(), UTF_8)); + } + } + } + } + + /** + * Same as {@link #reportResult(String, Result)} but never throws an exception. + * Logs a warning message on failure. + */ + public void tryReportResult(String testName, Result result, File logFile) { + try { + reportResult(testName, result, logFile); + } catch (IOException ex) { + LOG.warn("Failed to record test result for {} as {}", testName, result, ex); + } + } + + /** + * @return whether result reporting is enabled for this reporter + */ + public boolean isReportingEnabled() { + return options.reportResults; + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/junit/RetryRule.java b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/junit/RetryRule.java new file mode 100644 index 0000000000..f4ac3d0723 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/main/java/org/apache/kudu/test/junit/RetryRule.java @@ -0,0 +1,257 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test.junit; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.BufferedReader; +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.HashSet; +import java.util.Set; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.junit.rules.TestRule; +import org.junit.runner.Description; +import org.junit.runners.model.Statement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.test.CapturingToFileLogAppender; + +/** + * JUnit rule to retry failed tests. + * + * Uses the KUDU_FLAKY_TEST_LIST and KUDU_RETRY_ALL_FAILED_TESTS environment + * variables to determine whether a test should be retried, and the + * KUDU_FLAKY_TEST_ATTEMPTS environment variable to determine how many times. + * + * By default will use ResultReporter to report success/failure of each test + * attempt to an external server; this may be skipped if desired. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class RetryRule implements TestRule { + private static final Logger LOG = LoggerFactory.getLogger(RetryRule.class); + private static final int DEFAULT_RETRY_COUNT = 0; + private static final Set FLAKY_TESTS = new HashSet<>(); + + private final int retryCount; + private final ResultReporter reporter; + + static { + // Initialize the flaky test set if it exists. The file will have one test + // name per line. + String value = System.getenv("KUDU_FLAKY_TEST_LIST"); + if (value != null) { + try (BufferedReader br = Files.newBufferedReader(Paths.get(value), UTF_8)) { + for (String l = br.readLine(); l != null; l = br.readLine()) { + FLAKY_TESTS.add(l); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + public RetryRule() { + this(DEFAULT_RETRY_COUNT, /*skipReporting=*/ false); + } + + @InterfaceAudience.LimitedPrivate("Test") + RetryRule(int retryCount, boolean skipReporting) { + Preconditions.checkArgument(retryCount >= 0); + this.retryCount = retryCount; + this.reporter = skipReporting ? null : new ResultReporter(); + } + + private static boolean retryAllTests() { + String value = System.getenv("KUDU_RETRY_ALL_FAILED_TESTS"); + return value != null && !value.isEmpty(); + } + + private static boolean retryThisTest(String humanReadableTestName) { + return FLAKY_TESTS.contains(humanReadableTestName); + } + + private static int getActualRetryCount() { + String value = System.getenv("KUDU_FLAKY_TEST_ATTEMPTS"); + if (value == null) { + return DEFAULT_RETRY_COUNT; + } + try { + int val = Integer.parseInt(value); + if (val < 1) { + throw new NumberFormatException( + String.format("expected non-zero positive value, got %d", val)); + } + + // Convert from number of "attempts" to number of "retries". + return Integer.parseInt(value) - 1; + } catch (NumberFormatException e) { + LOG.warn("Could not parse KUDU_FLAKY_TEST_ATTEMPTS, using default value ({})", + DEFAULT_RETRY_COUNT, e); + return DEFAULT_RETRY_COUNT; + } + } + + @Override + public Statement apply(Statement base, Description description) { + String humanReadableTestName = + description.getClassName() + "." + description.getMethodName(); + + // Retrying and reporting are independent; the RetryStatement is used if + // either is enabled. We'll retry the test under one of the following + // circumstances: + // + // 1. The RetryRule was constructed with an explicit retry count. + // 2. We've been asked to retry all tests via KUDU_RETRY_ALL_FAILED_TESTS. + // 3. We've been asked to retry this test via KUDU_FLAKY_TEST_LIST. + // + // In the latter two cases, we consult KUDU_FLAKY_TEST_ATTEMPTS for the retry count. + boolean retryExplicit = retryCount != DEFAULT_RETRY_COUNT; + boolean retryAll = retryAllTests(); + boolean retryThis = retryThisTest(humanReadableTestName); + if (retryExplicit || retryAll || retryThis || reporter != null) { + int actualRetryCount = (retryAll || retryThis) ? getActualRetryCount() : retryCount; + LOG.info("Creating RetryStatement {} result reporter and retry count of {} ({})", + reporter != null ? "with" : "without", + actualRetryCount, + retryExplicit ? "explicit" : + retryAll ? "all tests" : + retryThis ? "this test" : "no retries"); + return new RetryStatement(base, actualRetryCount, reporter, humanReadableTestName); + } + return base; + } + + private static class RetryStatement extends Statement { + + private final Statement base; + private final int retryCount; + private final ResultReporter reporter; + private final String humanReadableTestName; + + RetryStatement(Statement base, int retryCount, ResultReporter reporter, + String humanReadableTestName) { + this.base = base; + this.retryCount = retryCount; + this.reporter = reporter; + this.humanReadableTestName = humanReadableTestName; + } + + private void report(ResultReporter.Result result, File logFile) { + reporter.tryReportResult(humanReadableTestName, result, logFile); + } + + private boolean wasClockUnsynchronized(File output) { + ProcessBuilder pb = new ProcessBuilder(ImmutableList.of( + "zgrep", "-q", "Clock considered unsynchronized", output.getPath())); + try { + Process p = pb.start(); + return p.waitFor() == 0; + } catch (InterruptedException | IOException e) { + throw new RuntimeException(e); + } + } + + private void doOneAttemptAndReport(int attempt) throws Throwable { + try (CapturingToFileLogAppender capturer = + new CapturingToFileLogAppender(/*useGzip=*/ true)) { + try { + try (Closeable c = capturer.attach()) { + base.evaluate(); + } + + // The test succeeded. + // + // We skip the file upload; this saves space and network bandwidth, + // and we don't need the logs of successful tests. + report(ResultReporter.Result.SUCCESS, /*logFile=*/ null); + return; + } catch (org.junit.internal.AssumptionViolatedException e) { + // The test was skipped because an assumption failed. + // A test for which an assumption fails should not generate a test case failure. + // + // We skip the file upload; this saves space and network bandwidth, + // and we don't need the logs of skipped tests. + report(ResultReporter.Result.SUCCESS, /*logFile=*/ null); + return; + } catch (Throwable t) { + // The test failed. + // + // Before reporting, capture the failing exception too. + try (Closeable c = capturer.attach()) { + LOG.error("{}: failed attempt {}", humanReadableTestName, attempt, t); + } + capturer.finish(); + + // We sometimes have flaky infrastructure where NTP is broken. In that + // case do not report the test failure. + File output = capturer.getOutputFile(); + if (wasClockUnsynchronized(output)) { + LOG.info("Not reporting test that failed due to NTP issues."); + } else { + report(ResultReporter.Result.FAILURE, output); + } + throw t; + } + } + } + + private void doOneAttempt(int attempt) throws Throwable { + try { + base.evaluate(); + } catch (org.junit.internal.AssumptionViolatedException e) { + // The test was skipped because an assumption failed. + // A test for which an assumption fails should not generate a test case failure. + LOG.warn("{}: skipped due to failed assumption", humanReadableTestName, e); + } catch (Throwable t) { + LOG.error("{}: failed attempt {}", humanReadableTestName, attempt, t); + throw t; + } + } + + @Override + public void evaluate() throws Throwable { + Throwable lastException; + int attempt = 0; + do { + attempt++; + try { + if (reporter != null && reporter.isReportingEnabled()) { + doOneAttemptAndReport(attempt); + } else { + doOneAttempt(attempt); + } + return; + } catch (Throwable t) { + lastException = t; + } + } while (attempt <= retryCount); + LOG.error("{}: giving up after {} attempts", humanReadableTestName, attempt); + throw lastException; + } + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/TestCapturingToFileLogAppender.java b/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/TestCapturingToFileLogAppender.java new file mode 100644 index 0000000000..2c76c5b01f --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/TestCapturingToFileLogAppender.java @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.BufferedReader; +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPInputStream; + +import com.google.common.base.Joiner; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.test.junit.RetryRule; + +public class TestCapturingToFileLogAppender { + private static final Logger LOG = + LoggerFactory.getLogger(TestCapturingToFileLogAppender.class); + + private static final String MAGIC_STRING = "hello world!"; + + @Rule + public RetryRule retryRule = new RetryRule(); + + private String readAllFromBufferedReader(BufferedReader br) throws IOException { + List output = new ArrayList<>(); + for (String line = br.readLine(); line != null; line = br.readLine()) { + output.add(line); + } + return Joiner.on("\n").join(output); + } + + private String readAllFromFile(File fileName) throws IOException { + try (InputStream fis = new FileInputStream(fileName); + Reader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) { + return readAllFromBufferedReader(br); + } + } + + private String readAllFromGzippedFile(File fileName) throws IOException { + try (InputStream fis = new FileInputStream(fileName); + InputStream gzis = new GZIPInputStream(fis); + Reader isr = new InputStreamReader(gzis, UTF_8); + BufferedReader br = new BufferedReader(isr)) { + return readAllFromBufferedReader(br); + } + } + + @Test + public void testLog() throws IOException { + File outputFile; + try (CapturingToFileLogAppender capturer = + new CapturingToFileLogAppender(/*useGzip=*/ false)) { + outputFile = capturer.getOutputFile(); + assertTrue(outputFile.exists()); + + // Log a magic string and flush the output file. + try (Closeable c = capturer.attach()) { + LOG.info(MAGIC_STRING); + } + capturer.finish(); + + // Read the magic string out of the output file. + String captured = readAllFromFile(outputFile); + assertNotNull(captured); + assertTrue(captured.contains(MAGIC_STRING)); + } + assertFalse(outputFile.exists()); + } + + @Test + public void testLogGzipped() throws IOException { + File outputFile; + try (CapturingToFileLogAppender capturer = + new CapturingToFileLogAppender(/*useGzip=*/ true)) { + outputFile = capturer.getOutputFile(); + assertTrue(outputFile.exists()); + + // Log a magic string and flush the output file. + try (Closeable c = capturer.attach()) { + LOG.info(MAGIC_STRING); + } + capturer.finish(); + + // Read the magic string out of the output file. + String captured = readAllFromGzippedFile(outputFile); + assertNotNull(captured); + assertTrue(captured.contains(MAGIC_STRING)); + } + assertFalse(outputFile.exists()); + } + + @Test + public void testLogException() throws IOException { + File outputFile; + try (CapturingToFileLogAppender capturer = + new CapturingToFileLogAppender(/*useGzip=*/ false)) { + outputFile = capturer.getOutputFile(); + assertTrue(outputFile.exists()); + + // Log a magic string and flush the output file. + try (Closeable c = capturer.attach()) { + LOG.error("Saw exception", new Exception(MAGIC_STRING)); + } + capturer.finish(); + + // Read the magic string out of the output file. + String captured = readAllFromFile(outputFile); + assertNotNull(captured); + assertTrue(captured.contains("java.lang.Exception: " + MAGIC_STRING)); + } + assertFalse(outputFile.exists()); + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/TestMiniKuduCluster.java b/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/TestMiniKuduCluster.java new file mode 100644 index 0000000000..db11df73ab --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/TestMiniKuduCluster.java @@ -0,0 +1,212 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test; + +import static org.apache.kudu.test.junit.AssertHelpers.assertEventuallyTrue; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.net.Socket; +import java.util.Arrays; + +import com.google.protobuf.ByteString; +import org.junit.Rule; +import org.junit.Test; + +import org.apache.kudu.client.HostAndPort; +import org.apache.kudu.client.KuduClient; +import org.apache.kudu.client.KuduClient.KuduClientBuilder; +import org.apache.kudu.client.ListTablesResponse; +import org.apache.kudu.client.TimeoutTracker; +import org.apache.kudu.test.cluster.FakeDNS; +import org.apache.kudu.test.cluster.MiniKuduCluster; +import org.apache.kudu.test.junit.AssertHelpers.BooleanExpression; +import org.apache.kudu.test.junit.RetryRule; + +public class TestMiniKuduCluster { + + private static final int NUM_TABLET_SERVERS = 3; + private static final int NUM_MASTERS = 1; + private static final long SLEEP_TIME_MS = 10000; + + @Rule + public RetryRule retryRule = new RetryRule(); + + @Rule + public KuduTestHarness harness; + + @Test(timeout = 50000) + public void test() throws Exception { + try (MiniKuduCluster cluster = new MiniKuduCluster.MiniKuduClusterBuilder() + .numMasterServers(NUM_MASTERS) + .numTabletServers(NUM_TABLET_SERVERS) + .build()) { + assertEquals(NUM_MASTERS, cluster.getMasterServers().size()); + assertEquals(NUM_TABLET_SERVERS, cluster.getTabletServers().size()); + + { + // Kill the master. + HostAndPort masterHostPort = cluster.getMasterServers().get(0); + testHostPort(masterHostPort, true); + cluster.killMasterServer(masterHostPort); + + testHostPort(masterHostPort, false); + + // Restart the master. + cluster.startMasterServer(masterHostPort); + + // Test we can reach it. + testHostPort(masterHostPort, true); + + // Pause master. + cluster.pauseMasterServer(masterHostPort); + // Pausing master again doesn't do anything. + cluster.pauseMasterServer(masterHostPort); + + // Resume master. + cluster.resumeMasterServer(masterHostPort); + // Resuming master while it's not paused doesn't do anything. + cluster.resumeMasterServer(masterHostPort); + } + + { + // Kill the first TS. + HostAndPort tsHostPort = cluster.getTabletServers().get(0); + testHostPort(tsHostPort, true); + cluster.killTabletServer(tsHostPort); + + testHostPort(tsHostPort, false); + + // Restart it. + cluster.startTabletServer(tsHostPort); + + testHostPort(tsHostPort, true); + + // Pause the first TS. + cluster.pauseTabletServer(tsHostPort); + // Pausing master again doesn't do anything. + cluster.pauseTabletServer(tsHostPort); + + // Resume test first TS. + cluster.resumeTabletServer(tsHostPort); + // Resuming master while it's not paused doesn't do anything. + cluster.resumeTabletServer(tsHostPort); + } + } + } + + @Test(timeout = 50000) + public void testJwtAuthn() throws Exception { + try (MiniKuduCluster cluster = createClusterForJwtTest(); + KuduClient client = new KuduClientBuilder(cluster.getMasterAddressesAsString()).build()) { + // It may take some time for the catalog manager to initialize + // and have IPKI CA certificate ready. + assertEventuallyTrue( + "valid cluster IPKI CA certificate captured", + new BooleanExpression() { + @Override + public boolean get() throws Exception { + return cluster.getCACertDer().length != 0; + } + }, + 10000/*timeoutMillis*/); + byte[] caCert = cluster.getCACertDer(); + assertNotEquals(0, caCert.length); + + String jwt = cluster.createJwtFor("account-id", "kudu", true); + assertNotNull(jwt); + client.jwt(jwt); + client.trustedCertificates(Arrays.asList(ByteString.copyFrom(caCert))); + + // A simple call to make sure the client can connect to the cluster. + // That assumes an RPC connection to the master has been successfully + // negotiated. + assertTrue(client.getTablesList().getTablesList().isEmpty()); + } + } + + @Test(timeout = 50000) + public void testKerberos() throws Exception { + FakeDNS.getInstance().install(); + try (MiniKuduCluster cluster = new MiniKuduCluster.MiniKuduClusterBuilder() + .numMasterServers(NUM_MASTERS) + .numTabletServers(NUM_TABLET_SERVERS) + .enableKerberos() + .build(); + KuduClient client = new KuduClientBuilder(cluster.getMasterAddressesAsString()).build()) { + ListTablesResponse resp = client.getTablesList(); + assertTrue(resp.getTablesList().isEmpty()); + assertNull(client.getHiveMetastoreConfig()); + } + } + + @Test(timeout = 100000) + public void testHiveMetastoreIntegration() throws Exception { + try (MiniKuduCluster cluster = new MiniKuduCluster.MiniKuduClusterBuilder() + .numMasterServers(NUM_MASTERS) + .numTabletServers(NUM_TABLET_SERVERS) + .enableHiveMetastoreIntegration() + .build(); + KuduClient client = new KuduClientBuilder(cluster.getMasterAddressesAsString()).build()) { + assertNotNull(client.getHiveMetastoreConfig()); + } + } + + /** + * Test whether the specified host and port is open or closed, waiting up to a certain time. + * @param hp the host and port to test + * @param testIsOpen true if we should want it to be open, false if we want it closed + */ + private static void testHostPort(HostAndPort hp, + boolean testIsOpen) throws InterruptedException { + TimeoutTracker tracker = new TimeoutTracker(); + while (tracker.getElapsedMillis() < SLEEP_TIME_MS) { + try { + Socket socket = new Socket(hp.getHost(), hp.getPort()); + socket.close(); + if (testIsOpen) { + return; + } + } catch (IOException e) { + if (!testIsOpen) { + return; + } + } + Thread.sleep(200); + } + fail("HostAndPort " + hp + " is still " + (testIsOpen ? "closed " : "open")); + } + + private MiniKuduCluster createClusterForJwtTest() throws IOException { + return new MiniKuduCluster.MiniKuduClusterBuilder() + .numMasterServers(NUM_MASTERS) + .numTabletServers(0) + .addMasterServerFlag("--enable-jwt-token-auth") + .addMasterServerFlag("--rpc-trace-negotiation") + .addMasterServerFlag("--rpc-authentication=required") + .addMasterServerFlag("--rpc-encryption=required") + .enableKerberos() + .addJwks("account-id", true) + .build(); + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/cluster/TestKuduBinaryJarExtractor.java b/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/cluster/TestKuduBinaryJarExtractor.java new file mode 100644 index 0000000000..ad56b87c29 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/cluster/TestKuduBinaryJarExtractor.java @@ -0,0 +1,165 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test.cluster; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLClassLoader; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.StandardCopyOption; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import com.google.gradle.osdetector.OsDetector; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.kudu.test.junit.RetryRule; + +public class TestKuduBinaryJarExtractor { + + private static final OsDetector DETECTOR = new OsDetector(); + private static final Logger LOG = LoggerFactory.getLogger(TestKuduBinaryJarExtractor.class); + + @Rule + public RetryRule retryRule = new RetryRule(); + + private Path createKuduBinaryJar(final String os) throws IOException, URISyntaxException { + String baseName = "fake-" + os + "-kudu-binary"; + Path tempDir = Files.createTempDirectory(baseName); + + // convert the filename to a URI + final Path path = Paths.get(tempDir.toString(), baseName + ".jar"); + LOG.info("Creating fake kudu binary jar at {}", path.toString()); + final URI uri = URI.create("jar:file:" + path.toUri().getPath()); + + final Map env = new HashMap<>(); + env.put("create", "true"); + try (FileSystem zipFs = FileSystems.newFileSystem(uri, env)) { + final Path root = zipFs.getPath("/"); + final Path src = + Paths.get(TestKuduBinaryJarExtractor.class.getResource("/fake-kudu-binary").toURI()); + + Files.walkFileTree(src, new SimpleFileVisitor() { + @Override + public FileVisitResult visitFile(Path file, + BasicFileAttributes attrs) throws IOException { + final Path dest = zipFs.getPath(root.toString(), + src.relativize(file).toString()); + Files.copy(file, dest, StandardCopyOption.REPLACE_EXISTING); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult preVisitDirectory(Path dir, + BasicFileAttributes attrs) throws IOException { + final Path dirToCreate = zipFs.getPath(root.toString(), + src.relativize(dir).toString()); + if (Files.notExists(dirToCreate)) { + LOG.debug("Creating directory {}", dirToCreate); + Files.createDirectories(dirToCreate); + } + return FileVisitResult.CONTINUE; + } + }); + + Path metaInf = zipFs.getPath(root.toString(), "META-INF"); + Files.createDirectory(metaInf); + // Customize the properties file to enable positive and negative test scenarios. + Path propsPath = zipFs.getPath(metaInf.toString(), "apache-kudu-test-binary.properties"); + try (OutputStream propsOutputStream = Files.newOutputStream(propsPath)) { + writeProperties(os, propsOutputStream); + } + } + return path; + } + + private static void writeProperties(String os, OutputStream out) throws IOException { + Properties properties = new Properties(); + properties.setProperty("format.version", "1"); + properties.setProperty("artifact.version", "1.9.0-SNAPSHOT"); + properties.setProperty("artifact.prefix", "apache-kudu-1.9.0-SNAPSHOT"); + properties.setProperty("artifact.os", os); + properties.setProperty("artifact.arch", DETECTOR.getArch()); + properties.store(out, "test"); + } + + /** + * Create a ClassLoader. The parent of the ClassLoader will be the current thread context + * ClassLoader, if not set, or the ClassLoader that loaded this test class if not. + * @param jars an array of jars to include in the child class loader. + */ + private ClassLoader createChildClassLoader(URL[] jars) { + ClassLoader parent = Thread.currentThread().getContextClassLoader(); + if (parent == null) { + parent = TestKuduBinaryJarExtractor.class.getClassLoader(); + } + assertNotNull(parent); + return URLClassLoader.newInstance(jars, parent); + } + + @Test + public void testExtractJar() throws IOException, URISyntaxException { + URI binaryJar = createKuduBinaryJar("osx").toUri(); + + binaryJar = URI.create("jar:" + binaryJar.toString()); + Path extractedDir = KuduBinaryJarExtractor.extractJar(binaryJar, + "apache-kudu-1.9.0-SNAPSHOT", + Files.createTempDirectory("kudu-test")); + Path extractedBinDir = Paths.get(extractedDir.toString(), "bin"); + assertTrue(extractedBinDir.toFile().exists()); + + Path kuduTserver = Paths.get(extractedBinDir.toString(), "kudu-tserver"); + assertTrue(Files.exists(kuduTserver)); + } + + @Test + public void testIsKuduBinaryJarOnClasspath() throws IOException, URISyntaxException { + KuduBinaryJarExtractor extractor = new KuduBinaryJarExtractor(); + assertFalse(extractor.isKuduBinaryJarOnClasspath()); + + boolean isOsX = System.getProperty("os.name").replaceAll("\\s", "").equalsIgnoreCase("macosx"); + + Path binaryJar = createKuduBinaryJar(isOsX ? "linux" : "osx"); + ClassLoader childLoader = createChildClassLoader(new URL[] { binaryJar.toUri().toURL() }); + Thread.currentThread().setContextClassLoader(childLoader); + assertFalse(extractor.isKuduBinaryJarOnClasspath()); + + binaryJar = createKuduBinaryJar(!isOsX ? "linux" : "osx"); + childLoader = createChildClassLoader(new URL[] { binaryJar.toUri().toURL() }); + Thread.currentThread().setContextClassLoader(childLoader); + assertTrue(extractor.isKuduBinaryJarOnClasspath()); + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/junit/TestResultReporter.java b/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/junit/TestResultReporter.java new file mode 100644 index 0000000000..3d96c8bcbe --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/junit/TestResultReporter.java @@ -0,0 +1,206 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test.junit; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import javax.servlet.MultipartConfigElement; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import javax.servlet.http.Part; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import org.apache.commons.io.IOUtils; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.servlet.ServletContextHandler; +import org.eclipse.jetty.servlet.ServletHolder; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Unit test for ResultReporter. */ +public class TestResultReporter { + private static final Logger LOGGER = LoggerFactory.getLogger(TestResultReporter.class); + private static final String BIND_ADDR = "127.0.0.1"; + private Server server; + private MockFlakyTestServlet flakyTestServlet; + + @Rule + public RetryRule retryRule = new RetryRule(); + + /** Record of a specific test run. */ + private static class TestRecord { + public String testName; + public String buildTag; + public String revision; + public String hostname; + public String buildConfig; + public int status; + public String log; + + public TestRecord(Map params) { + testName = params.get("test_name"); + buildTag = params.get("build_id"); + revision = params.get("revision"); + hostname = params.get("hostname"); + buildConfig = params.get("build_config"); + status = Integer.parseInt(params.get("status")); + log = params.get("log"); + } + + @Override + public String toString() { + List required = ImmutableList.of( + testName, buildTag, revision, hostname, buildConfig, Integer.toString(status)); + List all = new ArrayList<>(required); + if (log != null) { + all.add(log); + } + return Joiner.on(" ").join(all); + } + } + + /** + * Mock implementation of the flaky test server. + * + * Must be a servlet (not just a Jetty handler) to support multipart forms. + */ + private static class MockFlakyTestServlet extends HttpServlet { + private static final Logger LOG = LoggerFactory.getLogger(MockFlakyTestServlet.class); + private static final long serialVersionUID = 1L; + private final transient List records = new ArrayList<>(); + + List getRecords() { + return records; + } + + @Override + protected void doPost(HttpServletRequest request, + HttpServletResponse response) throws IOException, ServletException { + LOG.debug("Handling request {}: ", request); + + // Process the form parts into key/value pairs. + Map params = new HashMap<>(); + for (Part p : request.getParts()) { + params.put(p.getName(), IOUtils.toString(p.getInputStream(), UTF_8)); + } + + // We're done processing the request. + records.add(new TestRecord(params)); + response.setContentType("text/html; charset=utf-8"); + response.setStatus(HttpServletResponse.SC_OK); + } + } + + @Before + public void setup() throws Exception { + flakyTestServlet = new MockFlakyTestServlet(); + + // This Enterprise Java nonsense is to enable multipart form submission. The + // servlet is configured to only spill parts to disk if they exceed 1 MB in + // size, which isn't a concern for this test. + ServletContextHandler context = new ServletContextHandler(ServletContextHandler.SESSIONS); + context.setContextPath("/"); + ServletHolder holder = new ServletHolder(flakyTestServlet); + holder.getRegistration().setMultipartConfig(new MultipartConfigElement( + "", // location + 1024 * 1024, // maxFileSize + 1024 * 1024, // maxRequestSize + 1024 * 1024)); // fileSizeThreshold + context.addServlet(holder, "/*"); + + server = new Server(new InetSocketAddress(BIND_ADDR, 0)); + server.setHandler(context); + server.start(); + } + + @After + public void teardown() throws Exception { + server.stop(); + server.join(); + } + + @Test + public void testRoundTrip() throws IOException { + final ResultReporter.Options options = new ResultReporter.Options(); + assertNotNull(server); + assertTrue(server.isStarted()); + assertNotNull(server.getURI()); + options.httpEndpoint(BIND_ADDR + ":" + server.getURI().getPort()) + .buildTag("shark") + .revision("do") + .hostname("do-do") + .buildConfig("do-do-do"); + ResultReporter.Result[] expectedResults = { + ResultReporter.Result.SUCCESS, ResultReporter.Result.FAILURE }; + String[] testNames = { "baby", "mommy", "daddy"}; + String logFormat = "%s: a log message"; + ResultReporter reporter = new ResultReporter(options); + int expectedRecords = 0; + for (ResultReporter.Result result : expectedResults) { + for (String testName : testNames) { + File tempLogFile = null; + if (result == ResultReporter.Result.FAILURE) { + tempLogFile = File.createTempFile("test_log", ".txt"); + tempLogFile.deleteOnExit(); + FileOutputStream fos = new FileOutputStream(tempLogFile); + IOUtils.write(String.format(logFormat, testName), fos, UTF_8); + } + reporter.reportResult(testName, result, tempLogFile); + expectedRecords++; + } + } + assertEquals(expectedRecords, flakyTestServlet.getRecords().size()); + Iterator iterator = flakyTestServlet.getRecords().iterator(); + for (ResultReporter.Result result : expectedResults) { + for (String testName : testNames) { + assertTrue(iterator.hasNext()); + TestRecord record = iterator.next(); + LOGGER.info(record.toString()); + assertEquals(testName, record.testName); + assertEquals(result == ResultReporter.Result.SUCCESS ? 0 : 1, record.status); + assertEquals(result == ResultReporter.Result.FAILURE ? + String.format(logFormat, testName) : null, record.log); + } + } + } + + @Test + public void testHostName() { + // Just tests that this doesn't throw an exception. + LOGGER.info(ResultReporter.getLocalHostname()); + } +} diff --git a/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/junit/TestRetryRule.java b/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/junit/TestRetryRule.java new file mode 100644 index 0000000000..9617e71d13 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/test/java/org/apache/kudu/test/junit/TestRetryRule.java @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.kudu.test.junit; + +import static org.junit.Assert.fail; +import static org.junit.Assume.assumeTrue; + +import org.junit.Rule; +import org.junit.Test; + +public class TestRetryRule { + private static final int MAX_FAILURES = 2; + + // Track the number of times testRetry() failed, and was re-run, by the RetryRule so that we + // can eventually cause it to succeed. Each failure increments this variable before it throws + // an assertion exception. + private int failures = 0; + + // We skip flaky test reporting for this test because it is designed to fail. + @Rule + public RetryRule retryRule = new RetryRule(MAX_FAILURES, /*skipReporting=*/ true); + + // Ensure that the RetryRule prevents test failures as long as we don't exceed MAX_FAILURES + // failures. + @Test + public void testRetry() { + if (failures < MAX_FAILURES) { + failures++; + fail(String.format("%d failures", failures)); + } + // Pass the test (by not throwing) on the final retry. + } + + // Ensure that the RetryRule does not cause test failures when + // assumeTrue and other similar assumption statements are used. + @Test + public void testAssumeTrue() { + assumeTrue(false); + fail("This is unreachable!"); + } + +} diff --git a/java-scala-spark4/kudu-test-utils/src/test/resources/fake-kudu-binary/apache-kudu-1.9.0-SNAPSHOT/bin/kudu-tserver b/java-scala-spark4/kudu-test-utils/src/test/resources/fake-kudu-binary/apache-kudu-1.9.0-SNAPSHOT/bin/kudu-tserver new file mode 100644 index 0000000000..d9a03b3ac4 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/test/resources/fake-kudu-binary/apache-kudu-1.9.0-SNAPSHOT/bin/kudu-tserver @@ -0,0 +1,18 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +fake kudu-tserver... \ No newline at end of file diff --git a/java-scala-spark4/kudu-test-utils/src/test/resources/log4j2.properties b/java-scala-spark4/kudu-test-utils/src/test/resources/log4j2.properties new file mode 100644 index 0000000000..22762a1560 --- /dev/null +++ b/java-scala-spark4/kudu-test-utils/src/test/resources/log4j2.properties @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +status = error +name = PropertiesConfig +appenders = console + +appender.console.type = Console +appender.console.name = STDOUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n + +rootLogger.level = info +rootLogger.appenderRefs = stdout +rootLogger.appenderRef.stdout.ref = STDOUT + +logger.kudu.name = org.apache.kudu +logger.kudu.level = debug diff --git a/java-scala-spark4/settings.gradle b/java-scala-spark4/settings.gradle new file mode 100644 index 0000000000..ebe587ab84 --- /dev/null +++ b/java-scala-spark4/settings.gradle @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file contains the configuration of the project hierarchy. +// Mainly we just define what subprojects are in the build. + +rootProject.name = "kudu-parent" +include "kudu-backup" +include "kudu-backup-common" +include "kudu-backup-tools" +include "kudu-client" +include "kudu-hive" +include "kudu-jepsen" +include "kudu-proto" +include "kudu-spark" +include "kudu-spark-tools" +include "kudu-subprocess" +include "kudu-test-utils"