diff --git a/pom.xml b/pom.xml index 5f03a49bc..6fef7c6ba 100644 --- a/pom.xml +++ b/pom.xml @@ -222,7 +222,37 @@ args4j args4j - 2.0.9 + 2.33 + + + + log4j + log4j + 1.2.17 + + + + javax.xml.bind + jaxb-api + 2.2.2 + + + + xml-apis + xml-apis + 1.3.04 + + + + stax + stax-api + 1.0.1 + + + + javax.servlet + servlet-api + 2.5 @@ -231,6 +261,12 @@ ${hyracks.version} + + org.apache.hyracks + hyracks-util + ${hyracks.version} + + org.apache.hyracks hyracks-control-common @@ -328,12 +364,24 @@ jar + + org.apache.hadoop + hadoop-common + 2.7.0 + + org.apache.hadoop hadoop-hdfs 2.7.0 + + org.apache.hadoop + hadoop-mapreduce-client-core + 2.7.0 + + ant ant-trax @@ -383,6 +431,12 @@ 2.8.4 + + com.fasterxml.jackson.core + jackson-databind + 2.8.4 + + commons-io commons-io @@ -395,6 +449,18 @@ 1.4 + + commons-lang + commons-lang + 2.6 + + + + com.google.guava + guava + 11.0.2 + + junit junit @@ -670,7 +736,7 @@ UTF-8 UTF-8 - 0.3.0 + 0.3.2-SNAPSHOT 0.11 diff --git a/src/site/apt/development_contribution.apt b/src/site/apt/development_contribution.apt index f767cfda6..c64bad202 100644 --- a/src/site/apt/development_contribution.apt +++ b/src/site/apt/development_contribution.apt @@ -17,7 +17,7 @@ Contributing Code The following steps outline how to submit code to the VXQuery community for inclusion. Please read the Developer {{{http://vxquery.apache.org/developer_get_started.html}Get Started}} Guide - to answer question about getting start as a developer. + to answer questions about getting start as a developer. VXQuery community supports two methods for contributing code to the project. [[1]] <> @@ -55,13 +55,19 @@ Contributing Code VXQuery uses the following convention when creating a branch: authors_username/topic_or_issue (examples: prestonc/vxquery_142 or tillw/group_by_clause). The following branch name helps keep branches separated and keeps it easy to determine the author and topic. + The authors_username is very important when reviewing a developers code on your own machine. +--- +git checkout master +git pull +git branch prestonc/vxquery_142 +git checkout prestonc/vxquery_142 +--- * Make the change. :-) - * Add new tests. (optional) If the change is not covered in the XQTS, please create a new test in the VXQuery test suite @@ -82,12 +88,19 @@ Contributing Code Remove an extra debug code and verify the patch only includes code for the change. + * Commit and push code. + + Commit changes to the branch and push to github. + * Create a github Pull Request. Once the work has been tested, a pull request can be created for the change branch. Please use the Apache VXQuery master as branch to compare the change branch. - The branch should be up-to-date with the lastest Apache VXQuery master branch. + The branch should be up-to-date with the latest Apache VXQuery master branch. + + Git rebase is a nice option for keeping your code up-to-date with master without messing up the Pull Request. + (Merge will show changes in master as your changes on your branch.) * Post your Pull Request. @@ -96,16 +109,41 @@ Contributing Code At least one other member of the community should review the change. If there is any feedback, address this and repeat the posting process. + * Update your Pull Request. + + Update your change to address any comments from reviewers. + + * Prepare your change for merge. + + Squash your changes into a single commit with a nice commit message. + The commit message's first line should be less than 50 character and any additional comments + are included below a blank line. + +--- +VXQUERY-142: fn:doc support for source files + +The fn:doc function now supports reading files defined in the test suite XML source tag. +--- + + Git rebase has a option of merging commits into a single commit that works nicely for squashing your changes. + ({{{http://gitready.com/advanced/2009/02/10/squashing-commits-with-rebase.html}git ready}} has a nice example.) + Although, this will not work if you happened use merge when updated to the latest master branch. + + ** Code Reviewer * Review the Pull Request. - Post inline or global comments for the developer. + Post in-line or global comments for the developer. Be polite in your suggestions. Guide the developer to bring the code up to VXQuery's code standards. * Double check the VXQuery and XQTS tests. + Each Pull Request automatically triggers a {{{https://asterix-jenkins.ics.uci.edu/job/vxquery-pr/}VXQuery Jenkins}} + job that runs all the tests. + + ** VXQuery Committer (author or sponsor of the change) @@ -114,9 +152,39 @@ Contributing Code they are pushing on to the repository. Often the committer will also be the reviewer for non-committer changes. + * Add ASF as a git remote (first time committers). + + Create a git remote for ASF repository. {{{https://git-wip-us.apache.org/repos/asf/vxquery.git}}} + * Double check the VXQuery and XQTS tests. + A {{{https://asterix-jenkins.ics.uci.edu/job/vxquery-pr/}VXQuery Jenkins}} instance has been set up to + check the last ten Pull Requests. + The Pull Request being reviewed should pass all tests. + Each commit to a Pull Request will trigger a new test run. + Confirm the last test run passes all the tests. + + * Double check the change. + + Confirm the change has a single commit and includes a nice commit message. + * Merge the change with ASF master. - When merging the change, do not rebase. + When merging the change, do not <> (we do not want to change the Apache commit history). Instead do a single merge commit into Apache VXQuery master. + Since the Pull Request now has a single commit, an alternative would be to cherry pick that commit + from the given branch into master. + +--- +git checkout master +git merge prestonc/vxquery_142 +git log +--- + + Review the log to confirm the history is correct. + + + * Push change to ASF remote. + + Confirm the log is correct on your local master. + Push master to the ASF remote. diff --git a/src/site/apt/index.apt b/src/site/apt/index.apt index 3986f814a..919bf1f92 100644 --- a/src/site/apt/index.apt +++ b/src/site/apt/index.apt @@ -16,8 +16,9 @@ Apache VXQuery Apache VXQuery\x99 will be a standards compliant XML Query processor implemented in Java. - The focus is on the evaluation of queries on large amounts of XML data. - Specifically the goal is to evaluate queries on large collections of relatively small XML documents. + The XQuery processor supports the {{{http://www.jsoniq.org/}JSONiq}} extension to XQuery. + The focus is on the evaluation of queries on large amounts of JSON and XML data. + Specifically the goal is to evaluate queries on large collections of relatively small JSON or XML documents. To achieve this queries will be evaluated on a cluster of shared nothing machines. There are lots of large collections of relatively small documents like e.g. the {{{http://www.sec.gov/info/edgar/ednews/dissemin.htm}EDGAR dataset}} or the {{{http://wiki.openstreetmap.org/wiki/Download}OpenStreetMap dataset}}. diff --git a/src/site/resources/images/vxquery_stack.png b/src/site/resources/images/vxquery_stack.png index 01549a564..5a0efe57a 100644 Binary files a/src/site/resources/images/vxquery_stack.png and b/src/site/resources/images/vxquery_stack.png differ diff --git a/vxquery-cli/pom.xml b/vxquery-cli/pom.xml index 52c32ad9d..48bd63fa2 100644 --- a/vxquery-cli/pom.xml +++ b/vxquery-cli/pom.xml @@ -87,6 +87,16 @@ compile + + args4j + args4j + + + + commons-io + commons-io + + org.apache.hyracks hyracks-api @@ -122,19 +132,24 @@ hyracks-control-common + + org.apache.hyracks + hyracks-dataflow-common + + org.apache.hyracks hyracks-dataflow-std - org.apache.hyracks - hyracks-hdfs-core + org.apache.hyracks + hyracks-hdfs-core - org.apache.hyracks - hyracks-hdfs-2.x + org.apache.hyracks + hyracks-hdfs-2.x diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java index e0e38433e..dfb440783 100644 --- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java +++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java @@ -14,15 +14,17 @@ */ package org.apache.vxquery.cli; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.io.PrintWriter; import java.io.StringReader; -import java.net.InetAddress; +import java.net.Inet4Address; import java.nio.file.Files; import java.util.ArrayList; +import java.util.Arrays; import java.util.Date; import java.util.EnumSet; import java.util.HashMap; @@ -66,12 +68,13 @@ public class VXQuery { private final CmdLineOptions opts; + private final CmdLineOptions indexOpts; private ClusterControllerService cc; private NodeControllerService[] ncs; private IHyracksClientConnection hcc; private IHyracksDataset hds; - + private List collectionList; private ResultSetId resultSetId; private static List timingMessages = new ArrayList<>(); private static long sumTiming; @@ -87,6 +90,16 @@ public class VXQuery { */ public VXQuery(CmdLineOptions opts) { this.opts = opts; + // The index query returns only the result, without any other information. + this.indexOpts = opts; + indexOpts.showAST = false; + indexOpts.showOET = false; + indexOpts.showQuery = false; + indexOpts.showRP = false; + indexOpts.showTET = false; + indexOpts.timing = false; + indexOpts.compileOnly = false; + this.collectionList = new ArrayList(); } /** @@ -168,71 +181,87 @@ private void execute() throws Exception { * @throws SystemException * @throws Exception */ + private void runQueries() throws Exception { - Date start; - Date end; - for (String query : opts.arguments) { - String qStr = slurp(query); - if (opts.showQuery) { - System.err.println(qStr); + List queries = opts.arguments; + // Run the showIndexes query before executing any target query, to store the index metadata + List queriesIndex = new ArrayList(); + queriesIndex.add("vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/showIndexes.xq"); + OutputStream resultStream = new ByteArrayOutputStream(); + executeQuery(queriesIndex.get(0), 1, resultStream, indexOpts); + ByteArrayOutputStream bos = (ByteArrayOutputStream) resultStream; + String result = new String(bos.toByteArray()); + String[] collections = result.split("\n"); + this.collectionList = Arrays.asList(collections); + executeQueries(queries); + } + + public void executeQueries(List queries) throws Exception { + for (String query : queries) { + OutputStream resultStream = System.out; + if (opts.resultFile != null) { + resultStream = new FileOutputStream(new File(opts.resultFile)); } + executeQuery(query, opts.repeatExec, resultStream, opts); + } + } - VXQueryCompilationListener listener = new VXQueryCompilationListener(opts.showAST, opts.showTET, - opts.showOET, opts.showRP); + public void executeQuery(String query, int repeatedExecution, OutputStream resultStream, CmdLineOptions options) + throws Exception { + PrintWriter writer = new PrintWriter(resultStream, true); + String qStr = slurp(query); + if (opts.showQuery) { + writer.println(qStr); + } + VXQueryCompilationListener listener = new VXQueryCompilationListener(opts.showAST, opts.showTET, opts.showOET, + opts.showRP); - start = opts.timing ? new Date() : null; + Date start = opts.timing ? new Date() : null; - Map nodeControllerInfos = null; - if (hcc != null) { - nodeControllerInfos = hcc.getNodeControllerInfos(); - } - XMLQueryCompiler compiler = new XMLQueryCompiler(listener, nodeControllerInfos, opts.frameSize, - opts.availableProcessors, opts.joinHashSize, opts.maximumDataSize, opts.hdfsConf); - resultSetId = createResultSetId(); - CompilerControlBlock ccb = new CompilerControlBlock(new StaticContextImpl(RootStaticContextImpl.INSTANCE), - resultSetId, null); - compiler.compile(query, new StringReader(qStr), ccb, opts.optimizationLevel); - // if -timing argument passed, show the starting and ending times - if (opts.timing) { - end = new Date(); - timingMessage("Compile time: " + (end.getTime() - start.getTime()) + " ms"); - } - if (opts.compileOnly) { - continue; - } - - Module module = compiler.getModule(); - JobSpecification js = module.getHyracksJobSpecification(); + Map nodeControllerInfos = null; + if (hcc != null) { + nodeControllerInfos = hcc.getNodeControllerInfos(); + } + XMLQueryCompiler compiler = new XMLQueryCompiler(listener, nodeControllerInfos, opts.frameSize, + opts.availableProcessors, opts.joinHashSize, opts.maximumDataSize, opts.hdfsConf); + resultSetId = createResultSetId(); + CompilerControlBlock ccb = new CompilerControlBlock(new StaticContextImpl(RootStaticContextImpl.INSTANCE), + resultSetId, null); + compiler.compile(query, new StringReader(qStr), ccb, opts.optimizationLevel, this.collectionList); + // if -timing argument passed, show the starting and ending times + Date end = opts.timing ? new Date() : null; + if (opts.timing) { + timingMessage("Compile time: " + (end.getTime() - start.getTime()) + " ms"); + } + if (opts.compileOnly) { + return; + } - DynamicContext dCtx = new DynamicContextImpl(module.getModuleContext()); - js.setGlobalJobDataFactory(new VXQueryGlobalDataFactory(dCtx.createFactory())); + Module module = compiler.getModule(); + JobSpecification js = module.getHyracksJobSpecification(); - OutputStream resultStream = System.out; - if (opts.resultFile != null) { - resultStream = new FileOutputStream(new File(opts.resultFile)); - } + DynamicContext dCtx = new DynamicContextImpl(module.getModuleContext()); + js.setGlobalJobDataFactory(new VXQueryGlobalDataFactory(dCtx.createFactory())); - PrintWriter writer = new PrintWriter(resultStream, true); - // Repeat execution for number of times provided in -repeatexec argument - for (int i = 0; i < opts.repeatExec; ++i) { - start = opts.timing ? new Date() : null; - runJob(js, writer); - // if -timing argument passed, show the starting and ending times - if (opts.timing) { - end = new Date(); - long currentRun = end.getTime() - start.getTime(); - if ((i + 1) > opts.timingIgnoreQueries) { - sumTiming += currentRun; - sumSquaredTiming += currentRun * currentRun; - if (currentRun < minTiming) { - minTiming = currentRun; - } - if (maxTiming < currentRun) { - maxTiming = currentRun; - } + // Repeat execution for number of times provided in -repeatexec argument + for (int i = 0; i < repeatedExecution; ++i) { + start = opts.timing ? new Date() : null; + runJob(js, writer); + // if -timing argument passed, show the starting and ending times + if (opts.timing) { + end = new Date(); + long currentRun = end.getTime() - start.getTime(); + if ((i + 1) > opts.timingIgnoreQueries) { + sumTiming += currentRun; + sumSquaredTiming += currentRun * currentRun; + if (currentRun < minTiming) { + minTiming = currentRun; + } + if (maxTiming < currentRun) { + maxTiming = currentRun; } - timingMessage("Job (" + (i + 1) + ") execution time: " + currentRun + " ms"); } + timingMessage("Job (" + (i + 1) + ") execution time: " + currentRun + " ms"); } } } @@ -285,33 +314,32 @@ protected ResultSetId createResultSetId() { * @throws Exception */ public void startLocalHyracks() throws Exception { - String localAddress = InetAddress.getLocalHost().getHostAddress(); + String localAddress = Inet4Address.getLoopbackAddress().getHostAddress(); CCConfig ccConfig = new CCConfig(); - ccConfig.clientNetIpAddress = localAddress; - ccConfig.clientNetPort = 39000; - ccConfig.clusterNetIpAddress = localAddress; - ccConfig.clusterNetPort = 39001; - ccConfig.httpPort = 39002; - ccConfig.profileDumpPeriod = 10000; + ccConfig.setClientListenAddress(localAddress); + ccConfig.setClientListenPort(39000); + ccConfig.setClusterListenAddress(localAddress); + ccConfig.setClusterListenPort(39001); + ccConfig.setConsoleListenPort(39002); + ccConfig.setProfileDumpPeriod(10000); cc = new ClusterControllerService(ccConfig); cc.start(); ncs = new NodeControllerService[opts.localNodeControllers]; for (int i = 0; i < ncs.length; i++) { - NCConfig ncConfig = new NCConfig(); - ncConfig.ccHost = "localhost"; - ncConfig.ccPort = 39001; - ncConfig.clusterNetIPAddress = localAddress; - ncConfig.dataIPAddress = localAddress; - ncConfig.resultIPAddress = localAddress; - ncConfig.nodeId = "nc" + (i + 1); + NCConfig ncConfig = new NCConfig("nc" + (i + 1)); + ncConfig.setClusterAddress("localhost"); + ncConfig.setClusterPort(39001); + ncConfig.setClusterListenAddress(localAddress); + ncConfig.setDataListenAddress(localAddress); + ncConfig.setResultListenAddress(localAddress); //TODO: enable index folder as a cli option for on-the-fly indexing queries - ncConfig.ioDevices = Files.createTempDirectory(ncConfig.nodeId).toString(); + ncConfig.setIODevices(new String[] { Files.createTempDirectory(ncConfig.getNodeId()).toString() }); ncs[i] = new NodeControllerService(ncConfig); ncs[i].start(); } - hcc = new HyracksConnection(ccConfig.clientNetIpAddress, ccConfig.clientNetPort); + hcc = new HyracksConnection(ccConfig.getClientListenAddress(), ccConfig.getClientListenPort()); } /** diff --git a/vxquery-core/pom.xml b/vxquery-core/pom.xml index 38908d7e9..67d45e36e 100644 --- a/vxquery-core/pom.xml +++ b/vxquery-core/pom.xml @@ -175,6 +175,21 @@ commons-lang3 + + org.apache.hyracks + algebricks-common + + + + org.apache.hyracks + algebricks-core + + + + org.apache.hyracks + algebricks-runtime + + org.apache.hyracks algebricks-compiler @@ -195,6 +210,11 @@ hyracks-api + + org.apache.hyracks + hyracks-util + + org.apache.hyracks hyracks-data-std @@ -261,6 +281,11 @@ jackson-core + + com.fasterxml.jackson.core + jackson-databind + + commons-io commons-io @@ -271,12 +296,42 @@ commons-codec + + commons-lang + commons-lang + + + + com.google.guava + guava + + junit junit test + + javax.xml.bind + jaxb-api + + + + xml-apis + xml-apis + + + + stax + stax-api + + + + log4j + log4j + + lucene-core org.apache.lucene diff --git a/vxquery-core/src/main/java/org/apache/vxquery/common/VXQueryCommons.java b/vxquery-core/src/main/java/org/apache/vxquery/common/VXQueryCommons.java index ceaf3c79d..400fb157a 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/common/VXQueryCommons.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/common/VXQueryCommons.java @@ -35,7 +35,8 @@ public class VXQueryCommons { static { indexingFunctions.add(BuiltinFunctions.FN_BUILD_INDEX_ON_COLLECTION_1.getFunctionIdentifier()); - indexingFunctions.add(BuiltinFunctions.FN_COLLECTION_FROM_INDEX_2.getFunctionIdentifier()); + indexingFunctions.add(BuiltinFunctions.FN_COLLECTION_1.getFunctionIdentifier()); + indexingFunctions.add(BuiltinFunctions.FN_COLLECTION_FROM_INDEX_1.getFunctionIdentifier()); indexingFunctions.add(BuiltinFunctions.FN_DELETE_INDEX_1.getFunctionIdentifier()); indexingFunctions.add(BuiltinFunctions.FN_UPDATE_INDEX_1.getFunctionIdentifier()); } diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/algebricks/VXQueryComparatorFactoryProvider.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/algebricks/VXQueryComparatorFactoryProvider.java index b7196cfc1..be76a22e9 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/algebricks/VXQueryComparatorFactoryProvider.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/algebricks/VXQueryComparatorFactoryProvider.java @@ -30,6 +30,12 @@ public IBinaryComparatorFactory getBinaryComparatorFactory(Object type, boolean return new BinaryComparatorFactory(type, ascending); } + @Override + public IBinaryComparatorFactory getBinaryComparatorFactory(Object type, boolean ascending, boolean ignoreCase) + throws AlgebricksException { + throw new UnsupportedOperationException(); + } + private static class BinaryComparatorFactory implements IBinaryComparatorFactory { private static final long serialVersionUID = 1L; diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceCollectionRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceCollectionRule.java index 20283d836..11d379580 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceCollectionRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceCollectionRule.java @@ -16,12 +16,16 @@ */ package org.apache.vxquery.compiler.rewriter.rules; +import java.util.ArrayList; + import org.apache.commons.lang3.mutable.Mutable; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; import org.apache.vxquery.common.VXQueryCommons; import org.apache.vxquery.compiler.rewriter.VXQueryOptimizationContext; import org.apache.vxquery.metadata.VXQueryCollectionDataSource; +import org.apache.vxquery.metadata.VXQueryIndexingDataSource; +import org.apache.vxquery.metadata.VXQueryMetadataProvider; import org.apache.vxquery.types.AnyItemType; import org.apache.vxquery.types.Quantifier; import org.apache.vxquery.types.SequenceType; @@ -61,11 +65,35 @@ public class IntroduceCollectionRule extends AbstractCollectionRule { public boolean rewritePre(Mutable opRef, IOptimizationContext context) { VXQueryOptimizationContext vxqueryContext = (VXQueryOptimizationContext) context; String[] args = getFunctionalArguments(opRef, VXQueryCommons.collectionFunctions); - + VXQueryMetadataProvider metadata = (VXQueryMetadataProvider) context.getMetadataProvider(); if (args != null) { String collectionName = args[0]; // Build the new operator and update the query plan. int collectionId = vxqueryContext.newCollectionId(); + ArrayList collectionTempName = new ArrayList(); + collectionTempName.add(collectionName); + if (collectionName.contains("|")) { + collectionTempName.remove(0); + int index = collectionName.indexOf("|"); + int start = 0; + while (index >= 0) { + collectionTempName.add(collectionName.substring(start, index)); + start = index + 1; + index = collectionName.indexOf("|", index + 1); + if (index == -1) { + collectionTempName.add(collectionName.substring(start)); + } + } + } + if (metadata.hasIndex(collectionTempName)) { + VXQueryIndexingDataSource ids = VXQueryIndexingDataSource.create(collectionId, collectionName, + SequenceType.create(AnyItemType.INSTANCE, Quantifier.QUANT_STAR), + functionCall.getFunctionIdentifier().getName()); + if (ids != null) { + ids.setTotalDataSources(vxqueryContext.getTotalDataSources()); + return setDataSourceScan(ids, opRef); + } + } VXQueryCollectionDataSource ds = VXQueryCollectionDataSource.create(collectionId, collectionName, SequenceType.create(AnyItemType.INSTANCE, Quantifier.QUANT_STAR)); if (ds != null) { diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceIndexingRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceIndexingRule.java index 5b961317a..6e60d7584 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceIndexingRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceIndexingRule.java @@ -33,19 +33,19 @@ public class IntroduceIndexingRule extends AbstractCollectionRule { @Override - public boolean rewritePre(Mutable opRef, IOptimizationContext context) throws AlgebricksException { + public boolean rewritePre(Mutable opRef, IOptimizationContext context) + throws AlgebricksException { VXQueryOptimizationContext vxqueryContext = (VXQueryOptimizationContext) context; String args[] = getFunctionalArguments(opRef, VXQueryCommons.indexingFunctions); if (args != null) { String collection = args[0]; - String elementPath = args.length > 1?args[1]:null; - // Build the new operator and update the query plan. int collectionId = vxqueryContext.newCollectionId(); - VXQueryIndexingDataSource ids = VXQueryIndexingDataSource.create(collectionId, collection, elementPath, - SequenceType.create(AnyItemType.INSTANCE, Quantifier.QUANT_STAR), functionCall.getFunctionIdentifier().getName()); + VXQueryIndexingDataSource ids = VXQueryIndexingDataSource.create(collectionId, collection, + SequenceType.create(AnyItemType.INSTANCE, Quantifier.QUANT_STAR), + functionCall.getFunctionIdentifier().getName()); if (ids != null) { ids.setTotalDataSources(vxqueryContext.getTotalDataSources()); diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceTwoStepAggregateRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceTwoStepAggregateRule.java index 806b53292..df76805a4 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceTwoStepAggregateRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceTwoStepAggregateRule.java @@ -20,9 +20,6 @@ import java.util.Map; import org.apache.commons.lang3.mutable.Mutable; -import org.apache.vxquery.functions.BuiltinFunctions; -import org.apache.vxquery.functions.BuiltinOperators; - import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.common.utils.Pair; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; @@ -37,6 +34,8 @@ import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator; import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule; +import org.apache.vxquery.functions.BuiltinFunctions; +import org.apache.vxquery.functions.BuiltinOperators; /** * The rule searches for aggregate operators with an aggregate function @@ -107,9 +106,11 @@ public boolean rewritePre(Mutable opRef, IOptimizationContext if (aggregateFunctionCall.isTwoStep()) { return false; } - aggregateFunctionCall.setTwoStep(true); + aggregateFunctionCall = new AggregateFunctionCallExpression(aggregateFunctionCall.getFunctionInfo(), true, + aggregateFunctionCall.getArguments()); aggregateFunctionCall.setStepOneAggregate(AGGREGATE_MAP.get(functionCall.getFunctionIdentifier()).first); aggregateFunctionCall.setStepTwoAggregate(AGGREGATE_MAP.get(functionCall.getFunctionIdentifier()).second); + aggregate.getExpressions().get(0).setValue(aggregateFunctionCall); return true; } return false; diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushChildIntoDataScanRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushChildIntoDataScanRule.java index dbcce54c6..2773154eb 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushChildIntoDataScanRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushChildIntoDataScanRule.java @@ -55,12 +55,9 @@ public class PushChildIntoDataScanRule extends AbstractPushExpressionIntoDatasca @Override boolean updateDataSource(IVXQueryDataSource datasource, Mutable expression) { - //TODO: indexing needs to be extended to support push child into datascan - if (datasource.usingIndex()) { - return false; - } - boolean added = false; List> finds = new ArrayList>(); + boolean added = false; + ExpressionToolbox.findAllFunctionExpressions(expression, BuiltinOperators.CHILD.getFunctionIdentifier(), finds); for (int i = finds.size(); i > 0; --i) { int typeId = ExpressionToolbox.getTypeExpressionTypeArgument(finds.get(i - 1)); diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushValueIntoDatascanRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushValueIntoDatascanRule.java index 1d8a55d0b..b9014690d 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushValueIntoDatascanRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushValueIntoDatascanRule.java @@ -59,6 +59,9 @@ public class PushValueIntoDatascanRule extends AbstractPushExpressionIntoDatasca @Override boolean updateDataSource(IVXQueryDataSource datasource, Mutable expression) { + if (datasource.usingIndex()) { + return false; + } VXQueryCollectionDataSource ds = (VXQueryCollectionDataSource) datasource; boolean added = false; List> finds = new ArrayList>(); diff --git a/vxquery-core/src/main/java/org/apache/vxquery/context/DynamicContextImpl.java b/vxquery-core/src/main/java/org/apache/vxquery/context/DynamicContextImpl.java index 6a2a93059..7805c7213 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/context/DynamicContextImpl.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/context/DynamicContextImpl.java @@ -24,6 +24,7 @@ import org.apache.hyracks.data.std.api.IPointable; import org.apache.hyracks.data.std.api.IValueReference; import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; +import org.apache.vxquery.datamodel.accessors.atomic.XSDateTimePointable; public class DynamicContextImpl implements DynamicContext { private StaticContext sCtx; @@ -56,6 +57,14 @@ public void setCurrentDateTime(IValueReference value) { @Override public void getCurrentDateTime(IPointable value) { + if (currentDateTime == null) { + // if not set, get it from the JVM + final int dtLen = XSDateTimePointable.TYPE_TRAITS.getFixedLength(); + currentDateTime = new byte[dtLen]; + XSDateTimePointable datetimep = new XSDateTimePointable(); + datetimep.set(currentDateTime, 0, dtLen); + datetimep.setCurrentDateTime(); + } value.set(currentDateTime, 0, currentDateTime.length); } diff --git a/vxquery-core/src/main/java/org/apache/vxquery/context/DynamicContextImplFactory.java b/vxquery-core/src/main/java/org/apache/vxquery/context/DynamicContextImplFactory.java index a30c5adaa..c00ffc15c 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/context/DynamicContextImplFactory.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/context/DynamicContextImplFactory.java @@ -69,12 +69,11 @@ public DynamicContext createDynamicContext(IHyracksJobletContext ctx) { static IDynamicContextFactory createInstance(DynamicContextImpl dCtx) { IStaticContextFactory scFactory = dCtx.getStaticContext().createFactory(); - final int dtLen = XSDateTimePointable.TYPE_TRAITS.getFixedLength(); - byte[] currentDateTime = new byte[dtLen]; - XSDateTimePointable datetimep = new XSDateTimePointable(); - datetimep.set(currentDateTime, 0, dtLen); - datetimep.setCurrentDateTime(); - + VoidPointable vp = new VoidPointable(); + dCtx.getCurrentDateTime(vp); + byte[] currentDateTime = new byte[vp.getLength()]; + vp.copyInto(currentDateTime); + Map vMap = dCtx.getVariableMap(); int nVars = vMap.size(); QName[] variableNames = new QName[nVars]; diff --git a/vxquery-core/src/main/java/org/apache/vxquery/datamodel/accessors/atomic/XSDateTimePointable.java b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/accessors/atomic/XSDateTimePointable.java index 0f3a44a2e..40cf55b66 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/datamodel/accessors/atomic/XSDateTimePointable.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/accessors/atomic/XSDateTimePointable.java @@ -76,7 +76,10 @@ public ITypeTraits getTypeTraits() { }; public void setCurrentDateTime() { - Calendar cal = Calendar.getInstance(); + setCurrentDateTime(Calendar.getInstance()); + } + + public void setCurrentDateTime(Calendar cal) { TimeZone tz = cal.getTimeZone(); int year = cal.get(Calendar.YEAR); int month = cal.get(Calendar.MONTH); diff --git a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml index d64f42398..4932a781e 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml +++ b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml @@ -152,7 +152,6 @@ - @@ -166,7 +165,7 @@ - + diff --git a/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java index 7524da4a2..1df31dd78 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java @@ -22,7 +22,6 @@ import java.util.List; import org.apache.commons.lang3.StringUtils; -import org.apache.hyracks.data.std.api.IPointable; import org.apache.hyracks.data.std.primitive.BooleanPointable; import org.apache.hyracks.data.std.primitive.BytePointable; import org.apache.hyracks.data.std.primitive.DoublePointable; @@ -56,12 +55,10 @@ import org.apache.vxquery.datamodel.accessors.nodes.TextOrCommentNodePointable; import org.apache.vxquery.datamodel.values.ValueTag; import org.apache.vxquery.runtime.functions.cast.CastToStringOperation; -import org.apache.vxquery.runtime.functions.index.updateIndex.Constants; +import org.apache.vxquery.runtime.functions.index.update.Constants; import org.apache.vxquery.serializer.XMLSerializer; public class IndexDocumentBuilder extends XMLSerializer { - private final IPointable treePointable; - private final PointablePool pp; private NodeTreePointable ntp; @@ -88,16 +85,11 @@ public ComplexItem(StringField sfin, String idin) { } //TODO: Handle Processing Instructions, PrefixedNames, and Namepsace entries - public IndexDocumentBuilder(IPointable tree, IndexWriter inWriter, String file) { - this.treePointable = tree; + public IndexDocumentBuilder(TaggedValuePointable tvp, IndexWriter inWriter, String file) { writer = inWriter; this.filePath = file; - //convert to tagged value pointable - TaggedValuePointable tvp = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable(); - tvp.set(treePointable.getByteArray(), 0, treePointable.getLength()); - //get bytes and info from doc pointer bstart = tvp.getByteArray(); sstart = tvp.getStartOffset(); @@ -105,7 +97,7 @@ public IndexDocumentBuilder(IPointable tree, IndexWriter inWriter, String file) doc = new Document(); - results = new ArrayList(); + results = new ArrayList<>(); pp = PointablePoolFactory.INSTANCE.createPointablePool(); } diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java index 623b48cf5..005209dba 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java @@ -23,7 +23,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; -import java.net.InetAddress; +import java.net.Inet4Address; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -59,7 +59,6 @@ import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; import org.apache.hyracks.dataflow.common.comm.io.FrameFixedFieldTupleAppender; import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor; -import org.apache.hyracks.dataflow.common.comm.util.FrameUtils; import org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor; import org.apache.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable; import org.apache.hyracks.hdfs.ContextFactory; @@ -93,7 +92,7 @@ public VXQueryCollectionOperatorDescriptor(IOperatorDescriptorRegistry spec, Abs totalDataSources = (short) ds.getTotalDataSources(); childSeq = ds.getChildSeq(); valueSeq = ds.getValueSeq(); - recordDescriptors[0] = rDesc; + outRecDescs[0] = rDesc; this.tag = ds.getTag(); this.hdfsConf = hdfsConf; this.nodeControllerInfos = nodeControllerInfos; @@ -109,7 +108,7 @@ public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, final IFrameFieldAppender appender = new FrameFixedFieldTupleAppender(fieldOutputCount); final short partitionId = (short) ctx.getTaskAttemptId().getTaskId().getPartition(); final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider(partitionId, dataSourceId, totalDataSources); - final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId(); + final String nodeId = ctx.getJobletContext().getServiceContext().getNodeId(); final DynamicContext dCtx = (DynamicContext) ctx.getJobletContext().getGlobalJobData(); final ArrayBackedValueStorage jsonAbvs = new ArrayBackedValueStorage(); final String collectionName = collectionPartitions[partition % collectionPartitions.length]; @@ -129,38 +128,14 @@ public void open() throws HyracksDataException { public void nextFrame(ByteBuffer buffer) throws HyracksDataException { fta.reset(buffer); String collectionModifiedName = collectionName.replace("${nodeId}", nodeId); - Reader input; + if (!collectionModifiedName.contains("hdfs:/")) { File collectionDirectory = new File(collectionModifiedName); // check if directory is in the local file system if (collectionDirectory.exists()) { // Go through each tuple. if (collectionDirectory.isDirectory()) { - for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) { - Iterator it = FileUtils.iterateFiles(collectionDirectory, - new VXQueryIOFileFilter(), TrueFileFilter.INSTANCE); - while (it.hasNext()) { - File file = it.next(); - String fileName = file.getName().toLowerCase(); - if (fileName.endsWith(".xml")) { - if (LOGGER.isLoggable(Level.FINE)) { - LOGGER.fine("Starting to read XML document: " + file.getAbsolutePath()); - } - parser.parseElements(file, writer, tupleIndex); - } else if (fileName.endsWith(".json")) { - if (LOGGER.isLoggable(Level.FINE)) { - LOGGER.fine("Starting to read JSON document: " + file.getAbsolutePath()); - } - try { - jsonAbvs.reset(); - input = new InputStreamReader(new FileInputStream(file)); - jparser.parse(input, jsonAbvs, writer, appender); - } catch (FileNotFoundException e) { - throw new HyracksDataException(e.toString()); - } - } - } - } + xmlAndJsonCollection(collectionDirectory); } else { throw new HyracksDataException("Invalid directory parameter (" + nodeId + ":" + collectionDirectory.getAbsolutePath() + ") passed to collection."); @@ -182,7 +157,7 @@ public void nextFrame(ByteBuffer buffer) throws HyracksDataException { try { hdfs.scheduleSplits(); ArrayList schedule = hdfs - .getScheduleForNode(InetAddress.getLocalHost().getHostAddress()); + .getScheduleForNode(Inet4Address.getLoopbackAddress().getHostAddress()); List splits = hdfs.getSplits(); List fileSplits = new ArrayList<>(); for (int i : schedule) { @@ -272,6 +247,35 @@ public void nextFrame(ByteBuffer buffer) throws HyracksDataException { } } + public void xmlAndJsonCollection(File directory) throws HyracksDataException { + Reader input; + for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) { + Iterator it = FileUtils.iterateFiles(directory, new VXQueryIOFileFilter(), + TrueFileFilter.INSTANCE); + while (it.hasNext()) { + File file = it.next(); + String fileName = file.getName().toLowerCase(); + if (fileName.endsWith(".xml")) { + if (LOGGER.isLoggable(Level.FINE)) { + LOGGER.fine("Starting to read XML document: " + file.getAbsolutePath()); + } + parser.parseElements(file, writer, tupleIndex); + } else if (fileName.endsWith(".json")) { + if (LOGGER.isLoggable(Level.FINE)) { + LOGGER.fine("Starting to read JSON document: " + file.getAbsolutePath()); + } + try { + jsonAbvs.reset(); + input = new InputStreamReader(new FileInputStream(file)); + jparser.parse(input, jsonAbvs, writer, appender); + } catch (FileNotFoundException e) { + throw new HyracksDataException(e.toString()); + } + } + } + } + } + @Override public void fail() throws HyracksDataException { writer.fail(); diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryIndexingDataSource.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryIndexingDataSource.java index ea69cfd0b..d55530dcf 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryIndexingDataSource.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryIndexingDataSource.java @@ -31,14 +31,11 @@ */ public class VXQueryIndexingDataSource extends AbstractVXQueryDataSource { - private String elementPath; private String function; - private VXQueryIndexingDataSource(int id, String collection, String elementPath, Object[] types, - String functionCall) { + private VXQueryIndexingDataSource(int id, String collection, Object[] types, String functionCall) { this.dataSourceId = id; this.collectionName = collection; - this.elementPath = elementPath; this.function = functionCall; this.collectionPartitions = collectionName.split(DELIMITER); this.types = types; @@ -56,13 +53,8 @@ public IPhysicalPropertiesVector computePropertiesVector(List s this.valueSeq = new ArrayList<>(); } - public static VXQueryIndexingDataSource create(int id, String collection, String index, Object type, - String function) { - return new VXQueryIndexingDataSource(id, collection, index, new Object[] { type }, function); - } - - public String getElementPath() { - return elementPath; + public static VXQueryIndexingDataSource create(int id, String collection, Object type, String function) { + return new VXQueryIndexingDataSource(id, collection, new Object[] { type }, function); } public String getFunctionCall() { @@ -71,7 +63,7 @@ public String getFunctionCall() { @Override public String toString() { - return "VXQueryIndexingDataSource [collectionName=" + collectionName + ", elementPath=" + elementPath + return "VXQueryIndexingDataSource [collectionName=" + collectionName + ", elementPath=" + this.childSeq + ", function=" + function + "]"; } diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryIndexingOperatorDescriptor.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryIndexingOperatorDescriptor.java index ac92a0eed..929a6fc8e 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryIndexingOperatorDescriptor.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryIndexingOperatorDescriptor.java @@ -16,15 +16,13 @@ */ package org.apache.vxquery.metadata; -import java.io.DataInputStream; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.Map; +import java.util.List; import java.util.logging.Logger; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; -import org.apache.hyracks.api.client.NodeControllerInfo; import org.apache.hyracks.api.comm.IFrame; import org.apache.hyracks.api.comm.IFrameFieldAppender; import org.apache.hyracks.api.comm.VSizeFrame; @@ -35,23 +33,20 @@ import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.job.IOperatorDescriptorRegistry; import org.apache.hyracks.data.std.api.IPointable; -import org.apache.hyracks.data.std.primitive.UTF8StringPointable; import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; import org.apache.hyracks.dataflow.common.comm.io.FrameFixedFieldTupleAppender; import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor; -import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream; import org.apache.hyracks.dataflow.common.comm.util.FrameUtils; import org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor; import org.apache.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable; import org.apache.vxquery.datamodel.accessors.TaggedValuePointable; -import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder; import org.apache.vxquery.datamodel.values.XDMConstants; import org.apache.vxquery.exceptions.SystemException; import org.apache.vxquery.functions.BuiltinFunctions; import org.apache.vxquery.runtime.functions.index.IndexConstructorUtil; import org.apache.vxquery.runtime.functions.index.VXQueryIndexReader; -import org.apache.vxquery.runtime.functions.index.indexCentralizer.IndexCentralizerUtil; -import org.apache.vxquery.runtime.functions.index.updateIndex.IndexUpdater; +import org.apache.vxquery.runtime.functions.index.centralizer.IndexCentralizerUtil; +import org.apache.vxquery.runtime.functions.index.update.IndexUpdater; import org.apache.vxquery.xmlparser.ITreeNodeIdProvider; import org.apache.vxquery.xmlparser.TreeNodeIdProvider; @@ -61,18 +56,18 @@ public class VXQueryIndexingOperatorDescriptor extends AbstractSingleActivityOpe private short dataSourceId; private short totalDataSources; private String[] collectionPartitions; - private String elementPath; private final String functionCall; + private List childSeq; public VXQueryIndexingOperatorDescriptor(IOperatorDescriptorRegistry spec, VXQueryIndexingDataSource ds, - RecordDescriptor rDesc, String hdfsConf, Map nodeControllerInfos) { + RecordDescriptor rDesc) { super(spec, 1, 1); this.functionCall = ds.getFunctionCall(); collectionPartitions = ds.getPartitions(); dataSourceId = (short) ds.getDataSourceId(); totalDataSources = (short) ds.getTotalDataSources(); - recordDescriptors[0] = rDesc; - this.elementPath = ds.getElementPath(); + outRecDescs[0] = rDesc; + childSeq = ds.getChildSeq(); } @Override @@ -85,12 +80,13 @@ public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, final IFrameFieldAppender appender = new FrameFixedFieldTupleAppender(fieldOutputCount); final short partitionId = (short) ctx.getTaskAttemptId().getTaskId().getPartition(); final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider(partitionId, dataSourceId, totalDataSources); - final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId(); + final String nodeId = ctx.getJobletContext().getServiceContext().getNodeId(); final String collectionName = collectionPartitions[partition % collectionPartitions.length]; - String collectionModifiedName = collectionName.replace("${nodeId}", nodeId); + final String collectionModifiedName = collectionName.replace("${nodeId}", nodeId); IndexCentralizerUtil indexCentralizerUtil = new IndexCentralizerUtil( - ctx.getIOManager().getIODevices().get(0).getMount()); + ctx.getIoManager().getIODevices().get(0).getMount()); indexCentralizerUtil.readIndexDirectory(); + final IPointable result = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable(); return new AbstractUnaryInputUnaryOutputOperatorNodePushable() { @Override @@ -103,100 +99,115 @@ public void open() throws HyracksDataException { public void nextFrame(ByteBuffer buffer) throws HyracksDataException { fta.reset(buffer); - IPointable result = new TaggedValuePointable(); - - final UTF8StringPointable stringp = (UTF8StringPointable) UTF8StringPointable.FACTORY.createPointable(); - final TaggedValuePointable nodep = (TaggedValuePointable) TaggedValuePointable.FACTORY - .createPointable(); - - final ByteBufferInputStream bbis = new ByteBufferInputStream(); - final DataInputStream di = new DataInputStream(bbis); - final SequenceBuilder sb = new SequenceBuilder(); final ArrayBackedValueStorage abvs = new ArrayBackedValueStorage(); final ArrayBackedValueStorage abvsFileNode = new ArrayBackedValueStorage(); - String indexModifiedName; + abvs.reset(); + abvsFileNode.reset(); + if (collectionModifiedName.contains("hdfs://")) { throw new HyracksDataException("Indexing support for HDFS not yet implemented."); } else { if (functionCall.equals( BuiltinFunctions.FN_BUILD_INDEX_ON_COLLECTION_1.getFunctionIdentifier().getName())) { - indexModifiedName = indexCentralizerUtil.putIndexForCollection(collectionModifiedName); - File collectionDirectory = new File(collectionModifiedName); - - //check if directory is in the local file system - if (collectionDirectory.exists() && collectionDirectory.isDirectory()) { - IndexConstructorUtil indexConstructorUtil = new IndexConstructorUtil(); - try { - indexConstructorUtil.evaluate(collectionModifiedName, indexModifiedName, result, - stringp, bbis, di, sb, abvs, nodeIdProvider, abvsFileNode, nodep, false, - nodeId); - XDMConstants.setTrue(result); - FrameUtils.appendFieldToWriter(writer, appender, result.getByteArray(), - result.getStartOffset(), result.getLength()); - } catch (SystemException e) { - throw new HyracksDataException("Could not create index for collection: " - + collectionName + " in dir: " + indexModifiedName + " " + e.getMessage()); - } - } else { - throw new HyracksDataException("Cannot find Collection Directory (" + nodeId + ":" - + collectionDirectory.getAbsolutePath() + ")"); - } - } else if (functionCall - .equals(BuiltinFunctions.FN_UPDATE_INDEX_1.getFunctionIdentifier().getName())) { - indexModifiedName = indexCentralizerUtil.getIndexForCollection(collectionModifiedName); - IndexUpdater updater = new IndexUpdater(indexModifiedName, result, stringp, bbis, di, sb, abvs, - nodeIdProvider, abvsFileNode, nodep, nodeId); try { - updater.setup(); - updater.updateIndex(); - updater.updateMetadataFile(); - updater.exit(); - XDMConstants.setTrue(result); - FrameUtils.appendFieldToWriter(writer, appender, result.getByteArray(), - result.getStartOffset(), result.getLength()); + createIndex(result, abvs, abvsFileNode); } catch (IOException e) { - throw new HyracksDataException( - "Could not update index in " + indexModifiedName + " " + e.getMessage()); + throw new HyracksDataException(e); } + } else if (functionCall + .equals(BuiltinFunctions.FN_UPDATE_INDEX_1.getFunctionIdentifier().getName())) { + updateIndex(result, abvs, abvsFileNode); } else if (functionCall .equals(BuiltinFunctions.FN_DELETE_INDEX_1.getFunctionIdentifier().getName())) { - indexModifiedName = indexCentralizerUtil.getIndexForCollection(collectionModifiedName); - IndexUpdater updater = new IndexUpdater(indexModifiedName, result, stringp, bbis, di, sb, abvs, - nodeIdProvider, abvsFileNode, nodep, nodeId); - indexCentralizerUtil.deleteEntryForCollection(collectionModifiedName); - try { - updater.setup(); - updater.deleteAllIndexes(); - XDMConstants.setTrue(result); - FrameUtils.appendFieldToWriter(writer, appender, result.getByteArray(), - result.getStartOffset(), result.getLength()); - } catch (IOException e) { - throw new HyracksDataException( - "Could not delete index in " + indexModifiedName + " " + e.getMessage()); - } - + deleteIndex(result, abvs, abvsFileNode); } else if (functionCall - .equals(BuiltinFunctions.FN_COLLECTION_FROM_INDEX_2.getFunctionIdentifier().getName())) { - indexModifiedName = indexCentralizerUtil.getIndexForCollection(collectionModifiedName); - VXQueryIndexReader indexReader = new VXQueryIndexReader(ctx, indexModifiedName, elementPath); - try { - indexReader.init(); - while (indexReader.step(result)) { - FrameUtils.appendFieldToWriter(writer, appender, result.getByteArray(), - result.getStartOffset(), result.getLength()); - } - } catch (AlgebricksException e) { - throw new HyracksDataException("Could not read index."); - } - + .equals(BuiltinFunctions.FN_COLLECTION_FROM_INDEX_1.getFunctionIdentifier().getName()) + || functionCall + .equals(BuiltinFunctions.FN_COLLECTION_1.getFunctionIdentifier().getName())) { + usingIndex(result); } else { throw new HyracksDataException("Unsupported function call (" + functionCall + ")"); } } } + public void createIndex(IPointable result, ArrayBackedValueStorage abvs, + ArrayBackedValueStorage abvsFileNode) throws IOException { + String indexModifiedName = indexCentralizerUtil.putIndexForCollection(collectionModifiedName); + File collectionDirectory = new File(collectionModifiedName); + + //check if directory is in the local file system + if (collectionDirectory.exists() && collectionDirectory.isDirectory()) { + IndexConstructorUtil indexConstructorUtil = new IndexConstructorUtil(); + try { + indexConstructorUtil.evaluate(collectionModifiedName, indexModifiedName, result, abvs, + nodeIdProvider, abvsFileNode, false, nodeId); + XDMConstants.setTrue(result); + FrameUtils.appendFieldToWriter(writer, appender, result.getByteArray(), result.getStartOffset(), + result.getLength()); + } catch (SystemException e) { + throw new HyracksDataException("Could not create index for collection: " + collectionName + + " in dir: " + indexModifiedName + " " + e.getMessage(), e); + } + } else { + throw new HyracksDataException("Cannot find Collection Directory (" + nodeId + ":" + + collectionDirectory.getAbsolutePath() + ")"); + } + } + + public void updateIndex(IPointable result, ArrayBackedValueStorage abvs, + ArrayBackedValueStorage abvsFileNode) throws HyracksDataException { + String indexModifiedName = indexCentralizerUtil.getIndexForCollection(collectionModifiedName); + IndexUpdater updater = new IndexUpdater(indexModifiedName, result, abvs, nodeIdProvider, abvsFileNode, + nodeId); + try { + updater.setup(); + updater.updateIndex(); + updater.updateMetadataFile(); + updater.exit(); + XDMConstants.setTrue(result); + FrameUtils.appendFieldToWriter(writer, appender, result.getByteArray(), result.getStartOffset(), + result.getLength()); + } catch (IOException e) { + throw new HyracksDataException( + "Could not update index in " + indexModifiedName + " " + e.getMessage(), e); + } + } + + public void deleteIndex(IPointable result, ArrayBackedValueStorage abvs, + ArrayBackedValueStorage abvsFileNode) throws HyracksDataException { + String indexModifiedName = indexCentralizerUtil.getIndexForCollection(collectionModifiedName); + IndexUpdater updater = new IndexUpdater(indexModifiedName, result, abvs, nodeIdProvider, abvsFileNode, + nodeId); + indexCentralizerUtil.deleteEntryForCollection(collectionModifiedName); + try { + updater.setup(); + updater.deleteAllIndexes(); + XDMConstants.setTrue(result); + FrameUtils.appendFieldToWriter(writer, appender, result.getByteArray(), result.getStartOffset(), + result.getLength()); + } catch (IOException e) { + throw new HyracksDataException( + "Could not delete index in " + indexModifiedName + " " + e.getMessage(), e); + } + } + + public void usingIndex(IPointable result) throws HyracksDataException { + String indexModifiedName = indexCentralizerUtil.getIndexForCollection(collectionModifiedName); + VXQueryIndexReader indexReader = new VXQueryIndexReader(ctx, indexModifiedName, childSeq, appender); + try { + indexReader.init(); + for (int tupleIndex = 0; tupleIndex < fta.getTupleCount(); ++tupleIndex) { + while (indexReader.step(result, writer, tupleIndex)) { + } + } + } catch (AlgebricksException e) { + throw new HyracksDataException("Could not read index.", e); + } + } + @Override public void fail() throws HyracksDataException { writer.fail(); diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java index f6644d6f6..5bb9d1adf 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryMetadataProvider.java @@ -59,15 +59,17 @@ public class VXQueryMetadataProvider implements IMetadataProvider sourceFileMap; private final StaticContext staticCtx; private final String hdfsConf; + private final List collections; private final Map nodeControllerInfos; public VXQueryMetadataProvider(String[] nodeList, Map sourceFileMap, StaticContext staticCtx, - String hdfsConf, Map nodeControllerInfos) { + String hdfsConf, Map nodeControllerInfos, List collections) { this.nodeList = nodeList; this.sourceFileMap = sourceFileMap; this.staticCtx = staticCtx; this.hdfsConf = hdfsConf; this.nodeControllerInfos = nodeControllerInfos; + this.collections = collections; } @Override @@ -111,8 +113,7 @@ public Pair getScannerRuntim constraint = getClusterLocations(nodeList, ds.getPartitionCount()); } else { rDesc = new RecordDescriptor(new ISerializerDeserializer[opSchema.getSize()]); - scanner = new VXQueryIndexingOperatorDescriptor(jobSpec, (VXQueryIndexingDataSource) ds, rDesc, - this.hdfsConf, this.nodeControllerInfos); + scanner = new VXQueryIndexingOperatorDescriptor(jobSpec, (VXQueryIndexingDataSource) ds, rDesc); constraint = getClusterLocations(nodeList, ds.getPartitionCount()); } @@ -142,7 +143,7 @@ public static AlgebricksAbsolutePartitionConstraint getClusterLocations(String[] @Override public Pair getWriteFileRuntime(IDataSink sink, int[] printColumns, IPrinterFactory[] printerFactories, RecordDescriptor inputDesc) - throws AlgebricksException { + throws AlgebricksException { throw new UnsupportedOperationException(); } @@ -168,7 +169,7 @@ public Pair getIndexDeleteRu IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List primaryKeys, List secondaryKeys, List additionalNonKeyFields, ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec) - throws AlgebricksException { + throws AlgebricksException { throw new UnsupportedOperationException(); } @@ -234,7 +235,7 @@ public Pair getTokenizerRunt JobGenContext context, JobSpecification spec, boolean bulkload) throws AlgebricksException { throw new UnsupportedOperationException(); } - + @Override public Pair getInsertRuntime(IDataSource dataSource, IOperatorSchema propagatedSchema, IVariableTypeEnvironment typeEnv, List keys, @@ -243,7 +244,7 @@ public Pair getInsertRuntime JobSpecification jobSpec, boolean bulkload) throws AlgebricksException { throw new UnsupportedOperationException(); } - + @Override public Pair getUpsertRuntime(IDataSource dataSource, IOperatorSchema inputSchema, IVariableTypeEnvironment typeEnv, List keys, @@ -252,7 +253,7 @@ public Pair getUpsertRuntime JobSpecification jobSpec) throws AlgebricksException { throw new UnsupportedOperationException(); } - + @Override public Pair getIndexUpsertRuntime( IDataSourceIndex dataSourceIndex, IOperatorSchema propagatedSchema, @@ -263,11 +264,26 @@ public Pair getIndexUpsertRu JobSpecification spec) throws AlgebricksException { throw new UnsupportedOperationException(); } - + @Override public Map getConfig() { return new HashMap<>(); } + public List getIndexCollections() { + return collections; + + } + + public boolean hasIndex(ArrayList collections) { + boolean indexExists = false; + for (String collection : collections) { + indexExists = getIndexCollections().contains(collection); + if (!indexExists) { + break; + } + } + return indexExists; + } } diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java index 470649669..2c2575254 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorUtil.java @@ -16,10 +16,14 @@ */ package org.apache.vxquery.runtime.functions.index; +import java.io.File; +import java.io.IOException; +import java.nio.file.Paths; +import java.text.SimpleDateFormat; +import java.util.concurrent.ConcurrentHashMap; + import org.apache.hyracks.data.std.api.IPointable; -import org.apache.hyracks.data.std.primitive.UTF8StringPointable; import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; -import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -31,38 +35,31 @@ import org.apache.vxquery.exceptions.ErrorCode; import org.apache.vxquery.exceptions.SystemException; import org.apache.vxquery.index.IndexDocumentBuilder; -import org.apache.vxquery.runtime.functions.index.updateIndex.MetaFileUtil; -import org.apache.vxquery.runtime.functions.index.updateIndex.XmlMetadata; +import org.apache.vxquery.runtime.functions.index.update.MetaFileUtil; +import org.apache.vxquery.runtime.functions.index.update.XmlMetadata; import org.apache.vxquery.runtime.functions.util.FunctionHelper; import org.apache.vxquery.xmlparser.IParser; import org.apache.vxquery.xmlparser.ITreeNodeIdProvider; import org.apache.vxquery.xmlparser.XMLParser; -import java.io.DataInputStream; -import java.io.File; -import java.io.IOException; -import java.nio.file.Paths; -import java.text.SimpleDateFormat; -import java.util.concurrent.ConcurrentHashMap; - public class IndexConstructorUtil { - boolean isMetaFilePresent = false; - MetaFileUtil metaFileUtil; - ConcurrentHashMap metadataMap = new ConcurrentHashMap<>(); + private final TaggedValuePointable nodep = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable(); + private final SequenceBuilder sb = new SequenceBuilder(); + private boolean isMetaFilePresent = false; + private MetaFileUtil metaFileUtil; + private ConcurrentHashMap metadataMap = new ConcurrentHashMap<>(); - public void evaluate(String collectioFolder, String indexFolder, IPointable result, UTF8StringPointable - stringp, ByteBufferInputStream bbis, DataInputStream di, SequenceBuilder sb, ArrayBackedValueStorage abvs, - ITreeNodeIdProvider nodeIdProvider, ArrayBackedValueStorage abvsFileNode, TaggedValuePointable nodep, - boolean isElementPath, String nodeId) throws SystemException { + public void evaluate(String collectioFolder, String indexFolder, IPointable result, ArrayBackedValueStorage abvs, + ITreeNodeIdProvider nodeIdProvider, ArrayBackedValueStorage abvsFileNode, boolean isElementPath, + String nodeId) throws IOException { - metaFileUtil = new MetaFileUtil(indexFolder); -// metaFileUtil = .create(indexFolder); - isMetaFilePresent = metaFileUtil.isMetaFilePresent(); - metaFileUtil.setCollection(collectioFolder); + metaFileUtil = new MetaFileUtil(indexFolder); + isMetaFilePresent = metaFileUtil.isMetaFilePresent(); + metaFileUtil.setCollection(collectioFolder); File collectionDirectory = new File(collectioFolder); if (!collectionDirectory.exists()) { - throw new RuntimeException("The collection directory (" + collectioFolder + ") does not exist."); + throw new IOException("The collection directory (" + collectioFolder + ") does not exist."); } try { @@ -80,8 +77,7 @@ public void evaluate(String collectioFolder, String indexFolder, IPointable resu IndexWriter writer = new IndexWriter(dir, iwc); //Add files to index - indexXmlFiles(collectionDirectory, writer, isElementPath, nodep, abvsFileNode, nodeIdProvider, sb, bbis, di, - nodeId); + indexXmlFiles(collectionDirectory, writer, isElementPath, abvsFileNode, nodeIdProvider, sb, nodeId); if (!isMetaFilePresent) { // Write metadata map to a file. @@ -101,14 +97,13 @@ public void evaluate(String collectioFolder, String indexFolder, IPointable resu } } - /*This function goes recursively one file at a time. First it turns the file into an ABVS document node, then + /* + * This function goes recursively one file at a time. First it turns the file into an ABVS document node, then * it indexes that document node. */ public void indexXmlFiles(File collectionDirectory, IndexWriter writer, boolean isElementPath, - TaggedValuePointable nodep, ArrayBackedValueStorage abvsFileNode, ITreeNodeIdProvider nodeIdProvider, - SequenceBuilder sb, ByteBufferInputStream bbis, DataInputStream di, String nodeId) - throws SystemException, IOException { - + ArrayBackedValueStorage abvsFileNode, ITreeNodeIdProvider nodeIdProvider, SequenceBuilder sb, String nodeId) + throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy, HH:mm:ss"); for (File file : collectionDirectory.listFiles()) { @@ -116,8 +111,7 @@ public void indexXmlFiles(File collectionDirectory, IndexWriter writer, boolean if (readableXmlFile(file.getPath())) { abvsFileNode.reset(); - IndexDocumentBuilder ibuilder = getIndexBuilder(file, writer, nodep, abvsFileNode, nodeIdProvider, bbis, - di, nodeId); + IndexDocumentBuilder ibuilder = getIndexBuilder(file, writer, abvsFileNode, nodeIdProvider, nodeId); ibuilder.printStart(); if (!isMetaFilePresent) { @@ -131,22 +125,21 @@ public void indexXmlFiles(File collectionDirectory, IndexWriter writer, boolean } else if (file.isDirectory()) { // Consider all XML file in sub directories. - indexXmlFiles(file, writer, isElementPath, nodep, abvsFileNode, nodeIdProvider, sb, bbis, di, nodeId); + indexXmlFiles(file, writer, isElementPath, abvsFileNode, nodeIdProvider, sb, nodeId); } } } public boolean readableXmlFile(String path) { - return (path.toLowerCase().endsWith(".xml") || path.toLowerCase().endsWith(".xml.gz")); + return path.toLowerCase().endsWith(".xml") || path.toLowerCase().endsWith(".xml.gz"); } - public IndexDocumentBuilder getIndexBuilder(File file, IndexWriter writer, TaggedValuePointable nodep, - ArrayBackedValueStorage abvsFileNode, ITreeNodeIdProvider nodeIdProvider, ByteBufferInputStream bbis, - DataInputStream di, String nodeId) throws IOException { + public IndexDocumentBuilder getIndexBuilder(File file, IndexWriter writer, ArrayBackedValueStorage abvsFileNode, + ITreeNodeIdProvider nodeIdProvider, String nodeId) throws IOException { //Get the document node IParser parser = new XMLParser(false, nodeIdProvider, nodeId); - FunctionHelper.readInDocFromString(file.getPath(), bbis, di, abvsFileNode, parser); + FunctionHelper.readInDocFromString(file.getPath(), abvsFileNode, parser); nodep.set(abvsFileNode.getByteArray(), abvsFileNode.getStartOffset(), abvsFileNode.getLength()); diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/ShowIndexScalarEvaluatorFactory.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/ShowIndexesScalarEvaluatorFactory.java similarity index 89% rename from vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/ShowIndexScalarEvaluatorFactory.java rename to vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/ShowIndexesScalarEvaluatorFactory.java index 6677bd90a..6004d7902 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/ShowIndexScalarEvaluatorFactory.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/ShowIndexesScalarEvaluatorFactory.java @@ -30,12 +30,12 @@ import org.apache.vxquery.exceptions.SystemException; import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentScalarEvaluator; import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentScalarEvaluatorFactory; -import org.apache.vxquery.runtime.functions.index.indexCentralizer.IndexCentralizerUtil; +import org.apache.vxquery.runtime.functions.index.centralizer.IndexCentralizerUtil; -public class ShowIndexScalarEvaluatorFactory extends AbstractTaggedValueArgumentScalarEvaluatorFactory { +public class ShowIndexesScalarEvaluatorFactory extends AbstractTaggedValueArgumentScalarEvaluatorFactory { private static final long serialVersionUID = 1L; - public ShowIndexScalarEvaluatorFactory(IScalarEvaluatorFactory[] args) { + public ShowIndexesScalarEvaluatorFactory(IScalarEvaluatorFactory[] args) { super(args); } @@ -52,7 +52,7 @@ protected void evaluate(TaggedValuePointable[] args, IPointable result) throws S abvs.reset(); sb.reset(abvs); IndexCentralizerUtil indexCentralizerUtil = new IndexCentralizerUtil( - ctx.getIOManager().getIODevices().get(0).getMount()); + ctx.getIoManager().getIODevices().get(0).getMount()); indexCentralizerUtil.readIndexDirectory(); indexCentralizerUtil.getAllCollections(sb); sb.finish(); diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/VXQueryIndexReader.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/VXQueryIndexReader.java index 875084958..cf781ab4d 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/VXQueryIndexReader.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/VXQueryIndexReader.java @@ -16,11 +16,17 @@ */ package org.apache.vxquery.runtime.functions.index; +import java.io.IOException; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.api.comm.IFrameFieldAppender; +import org.apache.hyracks.api.comm.IFrameWriter; import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.data.std.api.IPointable; import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; -import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; @@ -32,21 +38,21 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; +import org.apache.vxquery.context.DynamicContext; import org.apache.vxquery.exceptions.ErrorCode; import org.apache.vxquery.exceptions.SystemException; import org.apache.vxquery.index.IndexAttributes; +import org.apache.vxquery.runtime.functions.util.FunctionHelper; +import org.apache.vxquery.types.ElementType; +import org.apache.vxquery.types.NameTest; +import org.apache.vxquery.types.NodeType; +import org.apache.vxquery.types.SequenceType; import org.apache.vxquery.xmlparser.ITreeNodeIdProvider; import org.apache.vxquery.xmlparser.SAXContentHandler; import org.apache.vxquery.xmlparser.TreeNodeIdProvider; import org.xml.sax.Attributes; import org.xml.sax.SAXException; -import java.io.DataInputStream; -import java.io.IOException; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; - public class VXQueryIndexReader { private ArrayBackedValueStorage nodeAbvs = new ArrayBackedValueStorage(); @@ -55,10 +61,7 @@ public class VXQueryIndexReader { private int indexLength; private String elementPath; private String indexName; - - private ByteBufferInputStream bbis = new ByteBufferInputStream(); - private DataInputStream di = new DataInputStream(bbis); - + private List childSequenceTypes; private IndexReader reader; private IndexSearcher searcher; private QueryParser parser; @@ -68,14 +71,40 @@ public class VXQueryIndexReader { private Document doc; private List fields; private IHyracksTaskContext ctx; + private String[] childLocalName = null; + private IFrameFieldAppender appender; + private boolean firstElement; - public VXQueryIndexReader(IHyracksTaskContext context, String indexPath, String elementPath) { + public VXQueryIndexReader(IHyracksTaskContext context, String indexPath, List childSeq, + IFrameFieldAppender appender) { this.ctx = context; this.indexName = indexPath; - this.elementPath = elementPath; + this.appender = appender; + final DynamicContext dCtx = (DynamicContext) ctx.getJobletContext().getGlobalJobData(); + childSequenceTypes = new ArrayList<>(); + for (int typeCode : childSeq) { + childSequenceTypes.add(dCtx.getStaticContext().lookupSequenceType(typeCode)); + } + childLocalName = new String[childSequenceTypes.size()]; + int index = 0; + StringBuilder stb = new StringBuilder(); + stb.append("/"); + for (SequenceType sType : childSequenceTypes) { + NodeType nodeType = (NodeType) sType.getItemType(); + ElementType eType = (ElementType) nodeType; + NameTest nameTest = eType.getNameTest(); + childLocalName[index] = FunctionHelper.getStringFromBytes(nameTest.getLocalName()); + + stb.append(childLocalName[index]); + if (index != childSequenceTypes.size() - 1) { + stb.append("/"); + } + ++index; + } + elementPath = stb.toString(); } - public boolean step(IPointable result) throws AlgebricksException { + public boolean step(IPointable result, IFrameWriter writer, int tupleIndex) throws AlgebricksException { /*each step will create a tuple for a single xml file * This is done using the parse function * checkoverflow is used throughout. This is because memory might not be @@ -88,6 +117,8 @@ public boolean step(IPointable result) throws AlgebricksException { //TODO: now we get back the entire document doc = searcher.doc(hits[indexPlace].doc); fields = doc.getFields(); + handler.setupElementWriter(writer, tupleIndex); + this.firstElement = true; parse(nodeAbvs); } catch (IOException e) { throw new AlgebricksException(e); @@ -103,7 +134,7 @@ public void init() throws SystemException { int partition = ctx.getTaskAttemptId().getTaskId().getPartition(); ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider((short) partition); - handler = new SAXContentHandler(false, nodeIdProvider, true); + handler = new SAXContentHandler(false, nodeIdProvider, appender, childSequenceTypes); nodeAbvs.reset(); indexPlace = 0; @@ -125,7 +156,7 @@ public void init() throws SystemException { String queryString = elementPath.replaceAll("/", "."); queryString = "item:" + queryString + "*"; - int lastslash = elementPath.lastIndexOf("/"); + int lastslash = elementPath.lastIndexOf('/'); elementPath = elementPath.substring(0, lastslash) + ":" + elementPath.substring(lastslash + 1); elementPath = elementPath.replaceAll("/", ".") + ".element"; @@ -135,31 +166,25 @@ public void init() throws SystemException { //TODO: Right now it only returns 1000000 results results = searcher.search(query, 1000000); - } catch (Exception e) { - throw new SystemException(null); + throw new SystemException(null, e); } hits = results.scoreDocs; - System.out.println("found: " + results.totalHits); indexPlace = 0; indexLength = hits.length; - } public void parse(ArrayBackedValueStorage abvsFileNode) throws IOException { try { - handler.startDocument(); - for (int i = 0; i < fields.size(); i++) { String fieldValue = fields.get(i).stringValue(); if (fieldValue.equals(elementPath)) { + handler.startDocument(); + this.firstElement = true; buildElement(abvsFileNode, i); } } - - handler.endDocument(); - handler.writeDocument(abvsFileNode); } catch (Exception e) { throw new IOException(e); } @@ -167,6 +192,7 @@ public void parse(ArrayBackedValueStorage abvsFileNode) throws IOException { private int buildElement(ArrayBackedValueStorage abvsFileNode, int fieldNum) throws SAXException { int whereIFinish = fieldNum; + int firstFinish; IndexableField field = fields.get(fieldNum); String contents = field.stringValue(); String uri = ""; @@ -176,18 +202,37 @@ private int buildElement(ArrayBackedValueStorage abvsFileNode, int fieldNum) thr String type = contents.substring(lastDot + 1); String lastBit = contents.substring(firstColon + 1, lastDot); - if (type.equals("textnode")) { + if (this.firstElement) { + this.firstElement = false; + firstFinish = whereIFinish - this.childSequenceTypes.size() + 1; + String firstBit = contents.substring(1, firstColon); + List names = new ArrayList<>(); + List values = new ArrayList<>(); + List uris = new ArrayList<>(); + List localNames = new ArrayList<>(); + List types = new ArrayList<>(); + List qNames = new ArrayList<>(); + firstFinish = findAttributeChildren(firstFinish, names, values, uris, localNames, types, qNames); + Attributes atts = new IndexAttributes(names, values, uris, localNames, types, qNames); + + handler.startElement(uri, firstBit, firstBit, atts); + buildElement(abvsFileNode, firstFinish + 1); + handler.endElement(uri, firstBit, firstBit); + + } + + if ("textnode".equals(type)) { char[] charContents = lastBit.toCharArray(); handler.characters(charContents, 0, charContents.length); } - if (type.equals("element")) { - List names = new ArrayList(); - List values = new ArrayList(); - List uris = new ArrayList(); - List localNames = new ArrayList(); - List types = new ArrayList(); - List qNames = new ArrayList(); + if ("element".equals(type)) { + List names = new ArrayList<>(); + List values = new ArrayList<>(); + List uris = new ArrayList<>(); + List localNames = new ArrayList<>(); + List types = new ArrayList<>(); + List qNames = new ArrayList<>(); whereIFinish = findAttributeChildren(whereIFinish, names, values, uris, localNames, types, qNames); Attributes atts = new IndexAttributes(names, values, uris, localNames, types, qNames); @@ -264,7 +309,7 @@ boolean isChild(IndexableField child, IndexableField adult) { String adultPath = adultId.substring(0, lastDotAdult); adultPath = adultPath.replaceFirst(":", "."); - return (childPath.startsWith(adultPath + ":") || childPath.startsWith(adultPath + ".")); + return childPath.startsWith(adultPath + ":") || childPath.startsWith(adultPath + "."); } boolean isDirectChildAttribute(IndexableField child, IndexableField adult) { @@ -278,7 +323,7 @@ boolean isDirectChildAttribute(IndexableField child, IndexableField adult) { String childType = childSegments[childSegments.length - 1]; - return (childPath.startsWith(adultPath + ":") && childType.equals("attribute")); + return childPath.startsWith(adultPath + ":") && "attribute".equals(childType); } } diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/indexCentralizer/IndexCentralizerUtil.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/centralizer/IndexCentralizerUtil.java similarity index 78% rename from vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/indexCentralizer/IndexCentralizerUtil.java rename to vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/centralizer/IndexCentralizerUtil.java index 51510d59a..1d584337e 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/indexCentralizer/IndexCentralizerUtil.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/centralizer/IndexCentralizerUtil.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.vxquery.runtime.functions.index.indexCentralizer; +package org.apache.vxquery.runtime.functions.index.centralizer; import java.io.DataOutput; import java.io.File; @@ -49,46 +49,54 @@ */ public class IndexCentralizerUtil { - private final String FILE_NAME = "VXQuery-Index-Directory.xml"; + private static final String FILE_NAME = "VXQuery-Index-Directory.xml"; private final List collections = new ArrayList<>(); - private final Logger LOGGER = Logger.getLogger("IndexCentralizerUtil"); - private File XML_FILE; - private String INDEX_LOCATION; - private static ConcurrentHashMap indexCollectionMap = new ConcurrentHashMap<>(); + private static final Logger LOGGER = Logger.getLogger("IndexCentralizerUtil"); + private File xmlFile; + private String indexPath; + public static ConcurrentHashMap indexCollectionMap = new ConcurrentHashMap<>(); + private static final StringValueBuilder svb = new StringValueBuilder(); + private final ArrayBackedValueStorage abvs = new ArrayBackedValueStorage(); + private final DataOutput output = abvs.getDataOutput(); public IndexCentralizerUtil(File index) { - this.INDEX_LOCATION = index.getPath(); + indexPath = index.getPath(); if (!index.exists()) { try { FileUtils.forceMkdir(index); } catch (IOException e) { - LOGGER.log(Level.SEVERE, "Could not create the index directory for path: " + INDEX_LOCATION + " " + e); + LOGGER.log(Level.SEVERE, "Could not create the index directory for path: " + indexPath + " " + e); } } - XML_FILE = new File(index.getPath() + "/" + FILE_NAME); + xmlFile = new File(index.getPath() + "/" + FILE_NAME); } /** * Get the index directory containing index of the given collection * - * @param collection : Collection folder + * @param collection + * : Collection folder * @return Index folder. */ public String getIndexForCollection(String collection) { - return indexCollectionMap.get(collection).getIndex(); + if (indexCollectionMap.size() > 0 && indexCollectionMap.containsKey(collection)) { + return indexCollectionMap.get(collection).getIndex(); + } + return null; } /** * Put the index location corresponding to given collection. * Index location is created by using the last 100 characters of collection. * - * @param collection : Collection directory + * @param collection + * : Collection directory * @return index */ public String putIndexForCollection(String collection) { int length = collection.replaceAll("/", "").length(); String index = collection.replaceAll("/", ""); - index = INDEX_LOCATION + "/" + (length > 100 ? index.substring(length - 100) : index); + index = indexPath + "/" + (length > 100 ? index.substring(length - 100) : index); IndexLocator il = new IndexLocator(); il.setCollection(collection); il.setIndex(index); @@ -102,7 +110,8 @@ public String putIndexForCollection(String collection) { /** * Remove the entry for given collection directory. * - * @param collection : Collection directory + * @param collection + * : Collection directory */ public void deleteEntryForCollection(String collection) { indexCollectionMap.remove(collection); @@ -110,14 +119,15 @@ public void deleteEntryForCollection(String collection) { /** * Prints all collections which have an index created. - * @param sb : The output is stored in a sequence - * @throws IOException : If writing the dataOutput generates {@link IOException} + * + * @param sb + * : The output is stored in a sequence + * @throws IOException + * : If writing the dataOutput generates {@link IOException} */ public void getAllCollections(SequenceBuilder sb) throws IOException { for (String s : collections) { - StringValueBuilder svb = new StringValueBuilder(); - ArrayBackedValueStorage abvs = new ArrayBackedValueStorage(); - DataOutput output = abvs.getDataOutput(); + abvs.reset(); output.write(ValueTag.XS_STRING_TAG); svb.write(s, output); sb.addItem(abvs); @@ -128,11 +138,11 @@ public void getAllCollections(SequenceBuilder sb) throws IOException { * Read the collection, index directory file and populate the HashMap. */ public void readIndexDirectory() { - if (this.XML_FILE.exists()) { + if (xmlFile.exists()) { try { JAXBContext jaxbContext = JAXBContext.newInstance(IndexDirectory.class); Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller(); - IndexDirectory indexDirectory = (IndexDirectory) jaxbUnmarshaller.unmarshal(this.XML_FILE); + IndexDirectory indexDirectory = (IndexDirectory) jaxbUnmarshaller.unmarshal(xmlFile); for (IndexLocator il : indexDirectory.getDirectory()) { indexCollectionMap.put(il.getCollection(), il); @@ -153,7 +163,7 @@ public void writeIndexDirectory() { List indexLocators = new ArrayList<>(indexCollectionMap.values()); id.setDirectory(indexLocators); try { - FileOutputStream fileOutputStream = new FileOutputStream(this.XML_FILE); + FileOutputStream fileOutputStream = new FileOutputStream(this.xmlFile); JAXBContext context = JAXBContext.newInstance(IndexDirectory.class); Marshaller jaxbMarshaller = context.createMarshaller(); jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true); diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/indexCentralizer/IndexDirectory.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/centralizer/IndexDirectory.java similarity index 89% rename from vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/indexCentralizer/IndexDirectory.java rename to vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/centralizer/IndexDirectory.java index 54d9ad98e..d1189269c 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/indexCentralizer/IndexDirectory.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/centralizer/IndexDirectory.java @@ -14,19 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.vxquery.runtime.functions.index.indexCentralizer; +package org.apache.vxquery.runtime.functions.index.centralizer; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlRootElement; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; @XmlRootElement(name = "indexes") @XmlAccessorType(XmlAccessType.FIELD) -public class IndexDirectory implements Serializable{ +public class IndexDirectory implements Serializable { + private static final long serialVersionUID = 1L; @XmlElement(name = "index", type = IndexLocator.class) private List directory = new ArrayList<>(); @@ -35,7 +37,6 @@ public List getDirectory() { return directory; } - public void setDirectory(List directory) { this.directory = directory; } diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/indexCentralizer/IndexLocator.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/centralizer/IndexLocator.java similarity index 89% rename from vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/indexCentralizer/IndexLocator.java rename to vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/centralizer/IndexLocator.java index 1a33c8b60..49dbecd88 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/indexCentralizer/IndexLocator.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/centralizer/IndexLocator.java @@ -14,17 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.vxquery.runtime.functions.index.indexCentralizer; +package org.apache.vxquery.runtime.functions.index.centralizer; + +import java.io.Serializable; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlAttribute; import javax.xml.bind.annotation.XmlRootElement; -import java.io.Serializable; @XmlAccessorType(XmlAccessType.FIELD) @XmlRootElement(name = "Entry") -public class IndexLocator implements Serializable{ +public class IndexLocator implements Serializable { + private static final long serialVersionUID = 1L; @XmlAttribute private String collection; diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/Constants.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/Constants.java similarity index 80% rename from vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/Constants.java rename to vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/Constants.java index 2a4574768..0346a62a4 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/Constants.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/Constants.java @@ -14,12 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.vxquery.runtime.functions.index.updateIndex; +package org.apache.vxquery.runtime.functions.index.update; /** * Constants used in updating index */ public class Constants { - public static String FIELD_PATH = "path"; - public static String META_FILE_NAME = "vxquery_index.xml"; + public static final String FIELD_PATH = "path"; + public static final String META_FILE_NAME = "vxquery_index.xml"; + + private Constants() { + } } diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/IndexUpdater.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/IndexUpdater.java similarity index 87% rename from vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/IndexUpdater.java rename to vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/IndexUpdater.java index d3b9fdf52..65a8325ec 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/IndexUpdater.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/IndexUpdater.java @@ -14,12 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.vxquery.runtime.functions.index.updateIndex; +package org.apache.vxquery.runtime.functions.index.update; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.text.SimpleDateFormat; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import org.apache.hyracks.data.std.api.IPointable; -import org.apache.hyracks.data.std.primitive.UTF8StringPointable; import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; -import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.lucene.index.IndexWriter; @@ -27,7 +34,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; -import org.apache.vxquery.datamodel.accessors.TaggedValuePointable; import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder; import org.apache.vxquery.exceptions.ErrorCode; import org.apache.vxquery.exceptions.SystemException; @@ -36,16 +42,6 @@ import org.apache.vxquery.runtime.functions.index.IndexConstructorUtil; import org.apache.vxquery.xmlparser.ITreeNodeIdProvider; -import java.io.DataInputStream; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.text.SimpleDateFormat; -import java.util.HashSet; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; - /** * Update the index if the source files are changed. */ @@ -53,48 +49,37 @@ public class IndexUpdater { private MetaFileUtil metaFileUtil; private ConcurrentHashMap metadataMap; private IPointable result; - private ByteBufferInputStream bbis; - private DataInputStream di; - private SequenceBuilder sb; + private final SequenceBuilder sb = new SequenceBuilder(); private ArrayBackedValueStorage abvs; private ITreeNodeIdProvider nodeIdProvider; private ArrayBackedValueStorage abvsFileNode; - private TaggedValuePointable nodep; private String nodeId; private IndexWriter indexWriter; private Set pathsFromFileList; private String collectionFolder; private String indexFolder; - private Logger LOGGER = Logger.getLogger("Index Updater"); + private final Logger LOGGER = Logger.getLogger("Index Updater"); private SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss"); private IndexConstructorUtil indexConstructorUtil = new IndexConstructorUtil(); - public IndexUpdater(String indexFolder, IPointable result, UTF8StringPointable stringp, ByteBufferInputStream bbis, - DataInputStream di, SequenceBuilder sb, ArrayBackedValueStorage abvs, ITreeNodeIdProvider nodeIdProvider, - ArrayBackedValueStorage abvsFileNode, TaggedValuePointable nodep, String nodeId) { + public IndexUpdater(String indexFolder, IPointable result, ArrayBackedValueStorage abvs, ITreeNodeIdProvider nodeIdProvider, + ArrayBackedValueStorage abvsFileNode, String nodeId) { this.indexFolder = indexFolder; this.result = result; - this.bbis = bbis; - this.di = di; - this.sb = sb; this.abvs = abvs; this.nodeIdProvider = nodeIdProvider; this.abvsFileNode = abvsFileNode; - this.nodep = nodep; this.nodeId = nodeId; this.pathsFromFileList = new HashSet<>(); } /** * Perform the initial configuration for index update/ delete processes. - * - * @throws SystemException - * : If getting the index folder generates {@link SystemException} + * * @throws IOException * : If getting the index folder generates {@link IOException} */ - public void setup() throws SystemException, IOException { - + public void setup() throws IOException { // Read the metadata file and load the metadata map into memory. metaFileUtil = new MetaFileUtil(indexFolder); metaFileUtil.readMetadataFile(); @@ -114,14 +99,14 @@ public void setup() throws SystemException, IOException { /** * Wrapper for update index function. - * + * * @throws IOException * : If the directory doesn't exist */ public void updateIndex() throws IOException { File collectionDirectory = new File(collectionFolder); if (!collectionDirectory.exists()) { - throw new RuntimeException("The collection directory (" + collectionFolder + ") does not exist."); + throw new IOException("The collection directory (" + collectionFolder + ") does not exist."); } //Execute update index process @@ -134,7 +119,7 @@ public void updateIndex() throws IOException { /** * Close opened IndexWriter and terminate the index update/ delete process. - * + * * @throws IOException * : If exiting the index folder generates {@link IOException} */ @@ -149,7 +134,7 @@ public void exit() throws IOException { /** * Functional wrapper to update Metadata file. - * + * * @throws IOException * : If updating metadata folder generates {@link IOException} */ @@ -190,8 +175,8 @@ private void updateIndex(File collection) throws IOException { //Update index corresponding to the xml file. indexWriter.deleteDocuments(new Term(Constants.FIELD_PATH, file.getCanonicalPath())); - indexDocumentBuilder = indexConstructorUtil.getIndexBuilder(file, indexWriter, nodep, - abvsFileNode, nodeIdProvider, bbis, di, nodeId); + indexDocumentBuilder = indexConstructorUtil.getIndexBuilder(file, indexWriter, abvsFileNode, + nodeIdProvider, nodeId); indexDocumentBuilder.printStart(); if (LOGGER.isDebugEnabled()) { @@ -207,8 +192,8 @@ private void updateIndex(File collection) throws IOException { // In this case, the xml file has not added to the index. (It is a newly added file) // Therefore generate a new index for this file and add it to the existing index. - indexDocumentBuilder = indexConstructorUtil.getIndexBuilder(file, indexWriter, nodep, abvsFileNode, - nodeIdProvider, bbis, di, nodeId); + indexDocumentBuilder = indexConstructorUtil.getIndexBuilder(file, indexWriter, abvsFileNode, nodeIdProvider, + nodeId); indexDocumentBuilder.printStart(); if (LOGGER.isDebugEnabled()) { @@ -235,8 +220,8 @@ private void updateIndex(File collection) throws IOException { * @throws IOException * : If getting the file info generates {@link IOException} */ - private XmlMetadata updateEntry(File file, XmlMetadata metadata) throws IOException { - + private XmlMetadata updateEntry(File file, XmlMetadata metadataArg) throws IOException { + XmlMetadata metadata = metadataArg; if (metadata == null) { metadata = new XmlMetadata(); } @@ -286,7 +271,7 @@ private void deleteIndexOfDeletedFiles(Set pathsFromMap, Set pat * When deleting indexes, if any error occurred, the process will be rolled back and all the indexes will be * restored. * Otherwise the changes will be committed. - * + * * @throws SystemException * : An attempt to divide by zero */ @@ -310,7 +295,7 @@ public void deleteAllIndexes() throws SystemException { sb.finish(); result.set(abvs); } catch (IOException e1) { - throw new SystemException(ErrorCode.FOAR0001); + throw new SystemException(ErrorCode.FOAR0001, e1); } } diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/MetaFileUtil.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/MetaFileUtil.java similarity index 85% rename from vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/MetaFileUtil.java rename to vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/MetaFileUtil.java index 0dfb54aec..5f4135567 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/MetaFileUtil.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/MetaFileUtil.java @@ -14,16 +14,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.vxquery.runtime.functions.index.updateIndex; +package org.apache.vxquery.runtime.functions.index.update; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; - -import javax.xml.bind.DatatypeConverter; -import javax.xml.bind.JAXBContext; -import javax.xml.bind.JAXBException; -import javax.xml.bind.Marshaller; -import javax.xml.bind.Unmarshaller; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; @@ -37,13 +29,22 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import javax.xml.bind.DatatypeConverter; +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBException; +import javax.xml.bind.Marshaller; +import javax.xml.bind.Unmarshaller; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; + /** * Utility class for writing, reading metadata file and generating checksum. */ public class MetaFileUtil { private File metaFile; - private Logger LOGGER = Logger.getLogger("MetadataFileUtil"); + private static final Logger LOGGER = Logger.getLogger("MetadataFileUtil"); private String index; private String collection; private ConcurrentHashMap indexMap = new ConcurrentHashMap<>(); @@ -65,8 +66,11 @@ public boolean isMetaFilePresent() { * Update the content of the metadata map. * If the current collection data is present, replace it. * Otherwise insert new. - * @param metadataMap : Set of XmlMetaData objects. - * @param index : The path to index location. + * + * @param metadataMap + * : Set of XmlMetaData objects. + * @param index + * : The path to index location. */ public void updateMetadataMap(ConcurrentHashMap metadataMap, String index) { this.indexMap = metadataMap; @@ -109,22 +113,22 @@ public void readMetadataFile() { * Write the content of the ConcurrentHashMap to the xml metadata file. */ public void writeMetadataToFile() { - XmlMetadataCollection collection = new XmlMetadataCollection(); + XmlMetadataCollection xmlMetadataCollection = new XmlMetadataCollection(); List metadataList = new ArrayList<>(); for (Map.Entry entry : this.indexMap.entrySet()) { metadataList.add(entry.getValue()); } - collection.setMetadataList(metadataList); - collection.setCollection(this.collection); - collection.setIndexLocation(this.index); - try{ + xmlMetadataCollection.setMetadataList(metadataList); + xmlMetadataCollection.setCollection(collection); + xmlMetadataCollection.setIndexLocation(this.index); + try { FileOutputStream fileOutputStream = new FileOutputStream(this.metaFile); JAXBContext jaxbContext = JAXBContext.newInstance(VXQueryIndex.class); Marshaller jaxbMarshaller = jaxbContext.createMarshaller(); jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true); - jaxbMarshaller.marshal(collection, fileOutputStream); + jaxbMarshaller.marshal(xmlMetadataCollection, fileOutputStream); if (LOGGER.isDebugEnabled()) { LOGGER.log(Level.DEBUG, "Writing metadata file completed successfully!"); @@ -135,18 +139,18 @@ public void writeMetadataToFile() { } } - } - /** * Generate MD5 checksum string for a given file. * - * @param file : File which the checksum should be generated. + * @param file + * : File which the checksum should be generated. * @return : Checksum String - * @throws IOException : The file is not available + * @throws IOException + * : The file is not available */ - public String generateMD5(File file) throws IOException { + public String generateMD5(File file) throws IOException { try { MessageDigest md = MessageDigest.getInstance("MD5"); md.update(Files.readAllBytes(file.toPath())); @@ -168,12 +172,12 @@ public String generateMD5(File file) throws IOException { public boolean deleteMetaDataFile() { try { Files.delete(Paths.get(metaFile.getCanonicalPath())); - if (LOGGER.isDebugEnabled()){ + if (LOGGER.isDebugEnabled()) { LOGGER.log(Level.DEBUG, "Metadata file deleted!"); } return true; } catch (IOException e) { - if (LOGGER.isTraceEnabled()){ + if (LOGGER.isTraceEnabled()) { LOGGER.log(Level.ERROR, "Metadata file could not be deleted!"); } return false; @@ -182,6 +186,7 @@ public boolean deleteMetaDataFile() { /** * Get the collection for a given index location. + * * @return collection folder for a given index. */ public String getCollection() { @@ -190,7 +195,9 @@ public String getCollection() { /** * Set the entry for given index and collection. - * @param collection : path to corresponding collection + * + * @param collection + * : path to corresponding collection */ public void setCollection(String collection) { this.collection = collection; diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/VXQueryIndex.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/VXQueryIndex.java similarity index 95% rename from vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/VXQueryIndex.java rename to vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/VXQueryIndex.java index fa92b2f36..fc967633a 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/VXQueryIndex.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/VXQueryIndex.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.vxquery.runtime.functions.index.updateIndex; +package org.apache.vxquery.runtime.functions.index.update; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/XmlMetadata.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/XmlMetadata.java similarity index 94% rename from vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/XmlMetadata.java rename to vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/XmlMetadata.java index b6da6d9bd..063120cc6 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/XmlMetadata.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/XmlMetadata.java @@ -14,12 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.vxquery.runtime.functions.index.updateIndex; +package org.apache.vxquery.runtime.functions.index.update; + +import java.io.Serializable; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlRootElement; -import java.io.Serializable; /** * Class to store metadata related to an XML file. @@ -32,6 +33,7 @@ @XmlRootElement(name = "file") @XmlAccessorType(XmlAccessType.FIELD) public class XmlMetadata implements Serializable { + private static final long serialVersionUID = 1L; private String path; private String md5; diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/XmlMetadataCollection.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/XmlMetadataCollection.java similarity index 97% rename from vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/XmlMetadataCollection.java rename to vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/XmlMetadataCollection.java index 1f5c3e99e..a1ca77646 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/updateIndex/XmlMetadataCollection.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/update/XmlMetadataCollection.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.vxquery.runtime.functions.index.updateIndex; +package org.apache.vxquery.runtime.functions.index.update; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/JnDocScalarEvaluatorFactory.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/JnDocScalarEvaluatorFactory.java index 665c812b8..85ef4ca06 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/JnDocScalarEvaluatorFactory.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/json/JnDocScalarEvaluatorFactory.java @@ -64,7 +64,7 @@ protected void evaluate(TaggedValuePointable[] args, IPointable result) throws S tvp.getValue(stringp); try { IParser parser = new JSONParser(); - FunctionHelper.readInDocFromPointable(stringp, bbis, di, abvs, parser); + FunctionHelper.readInDocFromPointable(stringp, abvs, parser); } catch (IOException e) { throw new SystemException(ErrorCode.FODC0002, e); } diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocAvailableScalarEvaluatorFactory.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocAvailableScalarEvaluatorFactory.java index db908f638..6d63d5fe2 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocAvailableScalarEvaluatorFactory.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocAvailableScalarEvaluatorFactory.java @@ -57,7 +57,7 @@ protected IScalarEvaluator createEvaluator(IHyracksTaskContext ctx, IScalarEvalu final DataInputStream di = new DataInputStream(bbis); final int partition = ctx.getTaskAttemptId().getTaskId().getPartition(); final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider((short) partition); - final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId(); + final String nodeId = ctx.getJobletContext().getServiceContext().getNodeId(); return new AbstractTaggedValueArgumentScalarEvaluator(args) { @Override @@ -78,7 +78,7 @@ protected void evaluate(TaggedValuePointable[] args, IPointable result) throws S tvp.getValue(stringp); try { IParser parser = new XMLParser(false, nodeIdProvider, nodeId); - FunctionHelper.readInDocFromPointable(stringp, bbis, di, abvs, parser); + FunctionHelper.readInDocFromPointable(stringp, abvs, parser); XDMConstants.setTrue(result); } catch (Exception e) { XDMConstants.setFalse(result); diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocScalarEvaluatorFactory.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocScalarEvaluatorFactory.java index 5f08a8eef..e3157afdb 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocScalarEvaluatorFactory.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocScalarEvaluatorFactory.java @@ -57,7 +57,7 @@ protected IScalarEvaluator createEvaluator(IHyracksTaskContext ctx, IScalarEvalu final DataInputStream di = new DataInputStream(bbis); final int partition = ctx.getTaskAttemptId().getTaskId().getPartition(); final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider((short) partition); - final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId(); + final String nodeId = ctx.getJobletContext().getServiceContext().getNodeId(); return new AbstractTaggedValueArgumentScalarEvaluator(args) { @Override @@ -79,7 +79,7 @@ protected void evaluate(TaggedValuePointable[] args, IPointable result) throws S try { // Only one document should be parsed so its ok to have a unique parser. IParser parser = new XMLParser(false, nodeIdProvider, nodeId); - FunctionHelper.readInDocFromPointable(stringp, bbis, di, abvs, parser); + FunctionHelper.readInDocFromPointable(stringp, abvs, parser); } catch (Exception e) { throw new SystemException(ErrorCode.SYSE0001, e); } diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java index 9026de3c7..03b9cf332 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java @@ -18,7 +18,6 @@ import java.io.IOException; -import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.data.std.api.IPointable; @@ -109,7 +108,7 @@ public boolean step(IPointable result) throws HyracksDataException { * @param result * result * @return found result - * @throws AlgebricksException + * @throws HyracksDataException * Could not save result. */ protected boolean stepNodeTree(TaggedValuePointable tvpInput, int level, IPointable result) diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/DescendantOrSelfPathStepUnnesting.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/DescendantOrSelfPathStepUnnesting.java index 305e26de0..facbee38f 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/DescendantOrSelfPathStepUnnesting.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/DescendantOrSelfPathStepUnnesting.java @@ -20,7 +20,6 @@ import java.util.ArrayList; import java.util.List; -import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.data.std.api.IPointable; @@ -141,7 +140,7 @@ private boolean processNodeTree(TaggedValuePointable rootTVP, IPointable result) * @param result * result * @return found result - * @throws AlgebricksException + * @throws HyracksDataException * Could not save result. */ protected boolean stepNodeTree(TaggedValuePointable tvpInput, int level, IPointable result) diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java index 7c3197a1e..1d66c4eb8 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java @@ -16,7 +16,6 @@ */ package org.apache.vxquery.runtime.functions.util; -import java.io.DataInputStream; import java.io.DataOutput; import java.io.File; import java.io.FileInputStream; @@ -37,7 +36,7 @@ import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; import org.apache.hyracks.data.std.util.GrowableArray; import org.apache.hyracks.data.std.util.UTF8StringBuilder; -import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream; +import org.apache.hyracks.util.string.UTF8StringUtil; import org.apache.vxquery.context.DynamicContext; import org.apache.vxquery.datamodel.accessors.TaggedValuePointable; import org.apache.vxquery.datamodel.accessors.TypedPointables; @@ -485,6 +484,15 @@ public static boolean arraysEqual(byte[] bytes1, int offset1, int length1, byte[ return true; } + public static String getStringFromBytes(byte[] bytes) { + if (bytes == null) { + return null; + } + StringBuilder sb = new StringBuilder(); + UTF8StringUtil.toString(sb, bytes, 0); + return sb.toString(); + } + public static boolean compareTaggedValues(AbstractValueComparisonOperation aOp, TaggedValuePointable tvp1, TaggedValuePointable tvp2, DynamicContext dCtx, TypedPointables tp1, TypedPointables tp2) throws SystemException { @@ -1215,13 +1223,12 @@ public static void printUTF8String(UTF8StringPointable stringp) { System.err.println(" printUTF8String END"); } - public static void readInDocFromPointable(UTF8StringPointable stringp, ByteBufferInputStream bbis, - DataInputStream di, ArrayBackedValueStorage abvs, IParser parser) throws IOException { - readInDocFromString(stringp.toString(), bbis, di, abvs, parser); + public static void readInDocFromPointable(UTF8StringPointable stringp, ArrayBackedValueStorage abvs, + IParser parser) throws IOException { + readInDocFromString(stringp.toString(), abvs, parser); } - public static void readInDocFromString(String fName, ByteBufferInputStream bbis, DataInputStream di, - ArrayBackedValueStorage abvs, IParser parser) throws IOException { + public static void readInDocFromString(String fName, ArrayBackedValueStorage abvs, IParser parser) throws IOException { Reader input; if (!fName.contains("hdfs:/")) { File file = new File(fName); @@ -1359,15 +1366,16 @@ public static void writeDateAsString(IDate date, UTF8StringBuilder sb) throws IO } /** - * Writes a number to the DataOutput with zeros as place holders if the number is too small to fill the padding. + * Writes a number to the string builder with zeros as place holders if the number is too small to fill the padding. * * @param valueArg * value * @param paddingArg * padding - * @param dOut - * data output + * @param sb + * string builder * @throws IOException + * Could not save result. */ public static void writeNumberWithPadding(long valueArg, int paddingArg, UTF8StringBuilder sb) throws IOException { long value = valueArg; diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java index 84c8ddf25..9e21f5399 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java @@ -129,8 +129,6 @@ public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider, IFrameFieldAppender appender, List childSequenceTypes) { this(attachTypes, nodeIdProvider, false); - - // Frame writing variables this.appender = appender; setChildPathSteps(childSequenceTypes); } @@ -297,10 +295,11 @@ public void startDocument() throws SAXException { /** * The filter settings here are similar to one in the class linked below. - * + * + * @throws SAXException * @see org.apache.vxquery.runtime.functions.step.NodeTestFilter.java */ - private boolean startElementChildPathStep(String uri, String localName) { + private boolean startElementChildPathStep(String uri, String localName) throws SAXException { if (subElement != null && depth <= subElement.length) { // Check path step if it exists. subElement[depth - 1] = true; diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java index 0c252b4f3..d3d02aee5 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java @@ -222,7 +222,7 @@ private void setNodeList() { } } - public void compile(String name, Reader query, CompilerControlBlock ccb, int optimizationLevel) + public void compile(String name, Reader query, CompilerControlBlock ccb, int optimizationLevel, List collections) throws AlgebricksException, SystemException { moduleNode = XMLQueryParser.parse(name, query); listener.notifyParseResult(moduleNode); @@ -230,7 +230,7 @@ public void compile(String name, Reader query, CompilerControlBlock ccb, int opt pprinter = new LogicalOperatorPrettyPrintVisitor(new AlgebricksAppendable(), new VXQueryLogicalExpressionPrettyPrintVisitor(module.getModuleContext())); VXQueryMetadataProvider mdProvider = new VXQueryMetadataProvider(nodeList, ccb.getSourceFileMap(), - module.getModuleContext(), this.hdfsConf, nodeControllerInfos); + module.getModuleContext(), this.hdfsConf, nodeControllerInfos, collections); compiler = cFactory.createCompiler(module.getBody(), mdProvider, 0); listener.notifyTranslationResult(module); XMLQueryTypeChecker.typeCheckModule(module); diff --git a/vxquery-core/src/test/java/org/apache/vxquery/indexing/MetaFileUtilTest.java b/vxquery-core/src/test/java/org/apache/vxquery/indexing/MetaFileUtilTest.java index 60f39f879..45d553fa6 100644 --- a/vxquery-core/src/test/java/org/apache/vxquery/indexing/MetaFileUtilTest.java +++ b/vxquery-core/src/test/java/org/apache/vxquery/indexing/MetaFileUtilTest.java @@ -23,8 +23,8 @@ import javax.xml.bind.JAXBException; import org.apache.commons.io.FileUtils; -import org.apache.vxquery.runtime.functions.index.updateIndex.MetaFileUtil; -import org.apache.vxquery.runtime.functions.index.updateIndex.XmlMetadata; +import org.apache.vxquery.runtime.functions.index.update.MetaFileUtil; +import org.apache.vxquery.runtime.functions.index.update.XmlMetadata; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; diff --git a/vxquery-core/src/test/java/org/apache/vxquery/indexing/TestConstants.java b/vxquery-core/src/test/java/org/apache/vxquery/indexing/TestConstants.java index b79107e2f..1b3e0b771 100644 --- a/vxquery-core/src/test/java/org/apache/vxquery/indexing/TestConstants.java +++ b/vxquery-core/src/test/java/org/apache/vxquery/indexing/TestConstants.java @@ -14,8 +14,6 @@ */ package org.apache.vxquery.indexing; -import org.apache.vxquery.runtime.functions.index.updateIndex.XmlMetadata; - import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -27,6 +25,8 @@ import java.nio.file.Paths; import java.util.concurrent.ConcurrentHashMap; +import org.apache.vxquery.runtime.functions.index.update.XmlMetadata; + /** * TestConstants and methods which will be used in indexing test cases. */ diff --git a/vxquery-core/src/test/java/org/apache/vxquery/xmlquery/query/SimpleXQueryTest.java b/vxquery-core/src/test/java/org/apache/vxquery/xmlquery/query/SimpleXQueryTest.java index 7646f9734..1ee35a474 100644 --- a/vxquery-core/src/test/java/org/apache/vxquery/xmlquery/query/SimpleXQueryTest.java +++ b/vxquery-core/src/test/java/org/apache/vxquery/xmlquery/query/SimpleXQueryTest.java @@ -142,6 +142,6 @@ private static void runTestInternal(String testName, String query) throws Except XMLQueryCompiler compiler = new XMLQueryCompiler(null, nodeControllerInfos, 65536); CompilerControlBlock ccb = new CompilerControlBlock(new StaticContextImpl(RootStaticContextImpl.INSTANCE), new ResultSetId(System.nanoTime()), null); - compiler.compile(testName, new StringReader(query), ccb, Integer.MAX_VALUE); + compiler.compile(testName, new StringReader(query), ccb, Integer.MAX_VALUE, null); } } diff --git a/vxquery-server/pom.xml b/vxquery-server/pom.xml index e572c3605..5cc9b5d84 100644 --- a/vxquery-server/pom.xml +++ b/vxquery-server/pom.xml @@ -143,6 +143,11 @@ 0.7-SNAPSHOT + + org.apache.hyracks + hyracks-api + + org.apache.hyracks hyracks-control-cc @@ -152,6 +157,11 @@ org.apache.hyracks hyracks-control-nc + + + args4j + args4j + diff --git a/vxquery-xtest/pom.xml b/vxquery-xtest/pom.xml index a00bec201..b8b7a8603 100644 --- a/vxquery-xtest/pom.xml +++ b/vxquery-xtest/pom.xml @@ -143,6 +143,26 @@ 0.7-SNAPSHOT + + args4j + args4j + + + + xml-apis + xml-apis + + + + stax + stax-api + + + + javax.servlet + servlet-api + + org.apache.hyracks hyracks-api @@ -153,6 +173,11 @@ hyracks-client + + org.apache.hyracks + hyracks-control-common + + org.apache.hyracks hyracks-control-cc @@ -168,6 +193,11 @@ hyracks-dataflow-std + + org.apache.hyracks + hyracks-dataflow-common + + org.apache.hyracks hyracks-hdfs-2.x @@ -178,11 +208,26 @@ commons-io + + org.apache.commons + commons-lang3 + + + + org.apache.hadoop + hadoop-common + + org.apache.hadoop hadoop-hdfs + + org.apache.hadoop + hadoop-mapreduce-client-core + + org.mortbay.jetty jetty diff --git a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestClusterUtil.java b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestClusterUtil.java index 0e5b48105..148dfa540 100644 --- a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestClusterUtil.java +++ b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestClusterUtil.java @@ -26,6 +26,7 @@ import java.io.File; import java.io.IOException; +import java.net.Inet4Address; import java.net.InetAddress; import java.net.UnknownHostException; @@ -45,26 +46,25 @@ private TestClusterUtil() { } public static CCConfig createCCConfig() throws UnknownHostException { - String publicAddress = InetAddress.getLocalHost().getHostAddress(); + String publicAddress = Inet4Address.getLoopbackAddress().getHostAddress(); CCConfig ccConfig = new CCConfig(); - ccConfig.clientNetIpAddress = publicAddress; - ccConfig.clientNetPort = CLIENT_NET_PORT; - ccConfig.clusterNetIpAddress = publicAddress; - ccConfig.clusterNetPort = CLUSTER_NET_PORT; - ccConfig.profileDumpPeriod = PROFILE_DUMP_PERIOD; + ccConfig.setClientListenAddress(publicAddress); + ccConfig.setClientListenPort(CLIENT_NET_PORT); + ccConfig.setClusterListenAddress(publicAddress); + ccConfig.setClusterListenPort(CLUSTER_NET_PORT); + ccConfig.setProfileDumpPeriod(PROFILE_DUMP_PERIOD); return ccConfig; } public static NCConfig createNCConfig() throws UnknownHostException { - String publicAddress = InetAddress.getLocalHost().getHostAddress(); - NCConfig ncConfig1 = new NCConfig(); - ncConfig1.ccHost = CC_HOST; - ncConfig1.ccPort = CLUSTER_NET_PORT; - ncConfig1.clusterNetIPAddress = publicAddress; - ncConfig1.dataIPAddress = publicAddress; - ncConfig1.resultIPAddress = publicAddress; - ncConfig1.nodeId = NODE_ID; - ncConfig1.ioDevices = IO_DEVICES; + String publicAddress = Inet4Address.getLoopbackAddress().getHostAddress(); + NCConfig ncConfig1 = new NCConfig(NODE_ID); + ncConfig1.setClusterAddress(CC_HOST); + ncConfig1.setClusterPort(CLUSTER_NET_PORT); + ncConfig1.setClusterListenAddress(publicAddress); + ncConfig1.setDataPublicAddress(publicAddress); + ncConfig1.setResultPublicAddress(publicAddress); + ncConfig1.setIODevices(new String[] { IO_DEVICES }); return ncConfig1; } @@ -75,11 +75,11 @@ public static ClusterControllerService startCC(XTestOptions opts) throws IOExcep File ccRoot = File.createTempFile(TestRunner.class.getName(), ".data", outDir); ccRoot.delete(); ccRoot.mkdir(); - ccConfig.ccRoot = ccRoot.getAbsolutePath(); + ccConfig.setRootDir(ccRoot.getAbsolutePath()); try { ClusterControllerService cc = new ClusterControllerService(ccConfig); cc.start(); - hcc = new HyracksConnection(ccConfig.clientNetIpAddress, ccConfig.clientNetPort); + hcc = new HyracksConnection(ccConfig.getClientListenAddress(), ccConfig.getClientListenPort()); hds = new HyracksDataset(hcc, opts.frameSize, opts.threads); return cc; } catch (Exception e) { diff --git a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunner.java b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunner.java index e4ba6eb92..a2cf709a9 100644 --- a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunner.java +++ b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/TestRunner.java @@ -14,16 +14,24 @@ */ package org.apache.vxquery.xtest; +import java.io.File; import java.io.FileInputStream; +import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Calendar; import java.util.EnumSet; +import java.util.List; import java.util.Map; +import java.util.TimeZone; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.hyracks.api.client.IHyracksClientConnection; import org.apache.hyracks.api.client.NodeControllerInfo; import org.apache.hyracks.api.comm.IFrame; @@ -45,6 +53,7 @@ import org.apache.vxquery.context.DynamicContextImpl; import org.apache.vxquery.context.RootStaticContextImpl; import org.apache.vxquery.context.StaticContextImpl; +import org.apache.vxquery.datamodel.accessors.atomic.XSDateTimePointable; import org.apache.vxquery.exceptions.ErrorCode; import org.apache.vxquery.exceptions.SystemException; import org.apache.vxquery.result.ResultUtils; @@ -53,13 +62,14 @@ public class TestRunner { private static final Pattern EMBEDDED_SYSERROR_PATTERN = Pattern.compile("(\\p{javaUpperCase}{4}\\d{4})"); - + private List collectionList; private XTestOptions opts; private IHyracksClientConnection hcc; private IHyracksDataset hds; public TestRunner(XTestOptions opts) throws UnknownHostException { this.opts = opts; + this.collectionList = new ArrayList(); } public void open() throws Exception { @@ -67,8 +77,46 @@ public void open() throws Exception { hds = TestClusterUtil.getDataset(); } + protected static TestConfiguration getIndexConfiguration(TestCase testCase) { + XTestOptions opts = new XTestOptions(); + opts.verbose = false; + opts.threads = 1; + opts.showQuery = true; + opts.showResult = true; + opts.hdfsConf = "src/test/resources/hadoop/conf"; + opts.catalog = StringUtils.join(new String[] { "src", "test", "resources", "VXQueryCatalog.xml" }, + File.separator); + TestConfiguration indexConf = new TestConfiguration(); + indexConf.options = opts; + String baseDir = new File(opts.catalog).getParent(); + try { + String root = new File(baseDir).getCanonicalPath(); + indexConf.testRoot = new File(root + "/./"); + indexConf.resultOffsetPath = new File(root + "/./ExpectedResults/"); + indexConf.sourceFileMap = testCase.getSourceFileMap(); + indexConf.xqueryFileExtension = ".xq"; + indexConf.xqueryxFileExtension = "xqx"; + indexConf.xqueryQueryOffsetPath = new File(root + "/./Queries/XQuery/"); + } catch (IOException e) { + e.printStackTrace(); + } + return indexConf; + + } + public TestCaseResult run(final TestCase testCase) { TestCaseResult res = new TestCaseResult(testCase); + TestCase testCaseIndex = new TestCase(getIndexConfiguration(testCase)); + testCaseIndex.setFolder("Indexing/Partition-1/"); + testCaseIndex.setName("showIndexes"); + runQuery(testCaseIndex, res); + String[] collections = res.result.split("\n"); + this.collectionList = Arrays.asList(collections); + runQueries(testCase, res); + return res; + } + + public void runQuery(TestCase testCase, TestCaseResult res) { if (opts.verbose) { System.err.println("Starting " + testCase.getXQueryDisplayName()); } @@ -78,6 +126,7 @@ public TestCaseResult run(final TestCase testCase) { try { try { if (opts.showQuery) { + FileInputStream query = new FileInputStream(testCase.getXQueryFile()); System.err.println("***Query for " + testCase.getXQueryDisplayName() + ": "); System.err.println(IOUtils.toString(query, "UTF-8")); @@ -98,11 +147,21 @@ public TestCaseResult run(final TestCase testCase) { CompilerControlBlock ccb = new CompilerControlBlock( new StaticContextImpl(RootStaticContextImpl.INSTANCE), new ResultSetId(testCase.getXQueryDisplayName().hashCode()), testCase.getSourceFileMap()); - compiler.compile(testCase.getXQueryDisplayName(), in, ccb, opts.optimizationLevel); + compiler.compile(testCase.getXQueryDisplayName(), in, ccb, opts.optimizationLevel, collectionList); JobSpecification spec = compiler.getModule().getHyracksJobSpecification(); in.close(); DynamicContext dCtx = new DynamicContextImpl(compiler.getModule().getModuleContext()); + + if (opts.timezone != null) { + final int dtLen = XSDateTimePointable.TYPE_TRAITS.getFixedLength(); + byte[] currentDateTime = new byte[dtLen]; + XSDateTimePointable datetimep = new XSDateTimePointable(); + datetimep.set(currentDateTime, 0, dtLen); + datetimep.setCurrentDateTime(Calendar.getInstance(TimeZone.getTimeZone(opts.timezone))); + dCtx.setCurrentDateTime(datetimep); + } + spec.setGlobalJobDataFactory(new VXQueryGlobalDataFactory(dCtx.createFactory())); spec.setMaxReattempts(0); @@ -112,7 +171,7 @@ public TestCaseResult run(final TestCase testCase) { IFrame frame = new VSizeFrame(resultDisplayFrameMgr); IHyracksDatasetReader reader = hds.createReader(jobId, ccb.getResultSetId()); // TODO(tillw) remove this loop once the IHyracksDatasetReader reliably returns the correct exception - while (reader.getResultStatus() == DatasetJobRecord.Status.RUNNING) { + while (reader.getResultStatus().getState() == DatasetJobRecord.State.RUNNING) { Thread.sleep(1); } IFrameTupleAccessor frameTupleAccessor = new ResultFrameTupleAccessor(); @@ -172,7 +231,11 @@ public TestCaseResult run(final TestCase testCase) { System.err.println(res.result); } } - return res; + + } + + public void runQueries(TestCase testCase, TestCaseResult res) { + runQuery(testCase, res); } public void close() throws Exception { diff --git a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/XTestOptions.java b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/XTestOptions.java index 496b74a8f..34cd5b1ee 100644 --- a/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/XTestOptions.java +++ b/vxquery-xtest/src/main/java/org/apache/vxquery/xtest/XTestOptions.java @@ -29,6 +29,9 @@ public class XTestOptions { @Option(name = "-catalog", required = true, usage = "Test catalog XML.") String catalog; + @Option(name = "-tz", required = false, usage = "Implicit timezone for test execution (default: JVM timezone).") + String timezone; + @Option(name = "-threads", required = false, usage = "Number of threads. (default: 1)") int threads; diff --git a/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryIT.java b/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryIT.java index c6f54f636..e6dfa22d5 100644 --- a/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryIT.java +++ b/vxquery-xtest/src/test/java/org/apache/vxquery/xtest/VXQueryIT.java @@ -45,6 +45,7 @@ public static XTestOptions getOptions() { XTestOptions options = getDefaultTestOptions(); options.catalog = XQTS_CATALOG; options.previousTestResults = StringUtils.join(new String[] { "results", "xqts.txt" }, File.separator); + options.timezone = "PST"; return options; } diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/Partition-1/useIndex1_user.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/Partition-1/useIndex1_user.txt new file mode 100644 index 000000000..baf9dcade --- /dev/null +++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/Partition-1/useIndex1_user.txt @@ -0,0 +1,2 @@ +2003-03-03T00:00:00.000TMINGHCND:AS00000000313.75a +2003-03-03T00:00:00.000TMAXGHCND:AS00000000333a \ No newline at end of file diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/Partition-2/useIndex1_user.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/Partition-2/useIndex1_user.txt new file mode 100644 index 000000000..baf9dcade --- /dev/null +++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/Partition-2/useIndex1_user.txt @@ -0,0 +1,2 @@ +2003-03-03T00:00:00.000TMINGHCND:AS00000000313.75a +2003-03-03T00:00:00.000TMAXGHCND:AS00000000333a \ No newline at end of file diff --git a/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/Partition-4/useIndex1_user.txt b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/Partition-4/useIndex1_user.txt new file mode 100644 index 000000000..baf9dcade --- /dev/null +++ b/vxquery-xtest/src/test/resources/ExpectedTestResults/Indexing/Partition-4/useIndex1_user.txt @@ -0,0 +1,2 @@ +2003-03-03T00:00:00.000TMINGHCND:AS00000000313.75a +2003-03-03T00:00:00.000TMAXGHCND:AS00000000333a \ No newline at end of file diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex1.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex1.xq index 63fdda7ee..96a767146 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex1.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex1.xq @@ -16,10 +16,10 @@ under the License. :) (: Search Lucene Index :) -for $r in collection-from-index("src/test/resources/TestSources/ghcnd", "/dataCollection/data")/data +for $r in collection("src/test/resources/TestSources/ghcnd")/dataCollection/data let $datetime := xs:dateTime(fn:data($r/date)) where $r/station eq "GHCND:AS000000003" and fn:year-from-dateTime($datetime) ge 2000 and fn:month-from-dateTime($datetime) eq 3 and fn:day-from-dateTime($datetime) eq 3 -return $r \ No newline at end of file +return $r diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex1_user.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex1_user.xq new file mode 100644 index 000000000..7a2bb2c2a --- /dev/null +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex1_user.xq @@ -0,0 +1,25 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: Search Lucene Index :) +for $r in collection-from-index("src/test/resources/TestSources/ghcnd")/dataCollection/data +let $datetime := xs:dateTime(fn:data($r/date)) +where $r/station eq "GHCND:AS000000003" + and fn:year-from-dateTime($datetime) ge 2000 + and fn:month-from-dateTime($datetime) eq 3 + and fn:day-from-dateTime($datetime) eq 3 +return $r diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex2.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex2.xq index cf41536c1..464c1cc68 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex2.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex2.xq @@ -19,6 +19,6 @@ (: Find all reading for hurricane force wind warning or extreme wind warning. :) (: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744 :) (: meters per second). (Wind value is in tenth of a meter per second) :) -for $r in collection-from-index("src/test/resources/TestSources/ghcnd", "/dataCollection/data")/data +for $r in collection("src/test/resources/TestSources/ghcnd")/dataCollection/data where $r/dataType eq "AWND" and xs:decimal($r/value) gt 491.744 return $r diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex3.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex3.xq index 5c99d9aff..eeb019bc5 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex3.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex3.xq @@ -19,7 +19,7 @@ (: Find the annual precipitation (PRCP) for a Seattle using the airport :) (: station (US000000002) for 2002. :) fn:sum( - for $r in collection-from-index("src/test/resources/TestSources/ghcnd", "/dataCollection/data")/data + for $r in collection("src/test/resources/TestSources/ghcnd")/dataCollection/data where $r/station eq "GHCND:US000000002" and $r/dataType eq "PRCP" and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2002 diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex4.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex4.xq index 39e5d175b..06284aec8 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex4.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex4.xq @@ -18,7 +18,7 @@ (: Search Lucene Index :) (: Find the highest recorded temperature (TMAX) in Celsius. :) fn:max( - for $r in collection-from-index("src/test/resources/TestSources/ghcnd", "/dataCollection/data")/data + for $r in collection("src/test/resources/TestSources/ghcnd")/dataCollection/data where $r/dataType eq "TMAX" return $r/value ) div 10 diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex5.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex5.xq index 63aeca5e8..3a4ae0507 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex5.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex5.xq @@ -18,6 +18,6 @@ (: Search Lucene Index :) (: Find all the weather readings for Washington state for a specific day :) (: 2002-2-2. :) -for $r in collection-from-index("src/test/resources/TestSources/ghcnd", "/dataCollection/data")/data +for $r in collection("src/test/resources/TestSources/ghcnd")/dataCollection/data where xs:dateTime(fn:data($r/date)) eq xs:dateTime("2002-02-02T00:00:00.000") return $r diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex6.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex6.xq index c81f27153..9090eddf7 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex6.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex6.xq @@ -18,7 +18,7 @@ (: Search Lucene Index :) (: Find all the weather readings for Washington state for a specific day :) (: 2002-2-2. :) -for $s in collection-from-index("src/test/resources/TestSources/ghcnd", "/stationCollection/station")/station +for $s in collection("src/test/resources/TestSources/ghcnd")/stationCollection/station where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "STATE 1")) order by $s/id return $s diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex7.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex7.xq index dd6b5f920..7c703b66b 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex7.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-1/useIndex7.xq @@ -18,8 +18,8 @@ (: Search Lucene Index :) (: Find all the weather readings for Washington state for a specific day :) (: 2002-2-2. :) -for $s in collection-from-index("src/test/resources/TestSources/ghcnd", "/stationCollection/station")/station -for $r in collection-from-index("src/test/resources/TestSources/ghcnd", "/dataCollection/data")/data +for $s in collection("src/test/resources/TestSources/ghcnd")/stationCollection/station +for $r in collection("src/test/resources/TestSources/ghcnd")/dataCollection/data where $s/id eq $r/station and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "STATE 1")) diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex1.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex1.xq index fecb56df7..471824032 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex1.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex1.xq @@ -16,7 +16,7 @@ under the License. :) (: Search Lucene Index :) -for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2", "/dataCollection/data")/data +for $r in collection("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2")/dataCollection/data let $datetime := xs:dateTime(fn:data($r/date)) where $r/station eq "GHCND:AS000000003" and fn:year-from-dateTime($datetime) ge 2000 diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex1_user.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex1_user.xq new file mode 100644 index 000000000..0e42155a0 --- /dev/null +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex1_user.xq @@ -0,0 +1,25 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: Search Lucene Index :) +for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2")/dataCollection/data +let $datetime := xs:dateTime(fn:data($r/date)) +where $r/station eq "GHCND:AS000000003" + and fn:year-from-dateTime($datetime) ge 2000 + and fn:month-from-dateTime($datetime) eq 3 + and fn:day-from-dateTime($datetime) eq 3 +return $r diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex2.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex2.xq index 75c7a6475..37e5626bc 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex2.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex2.xq @@ -19,6 +19,6 @@ (: Find all reading for hurricane force wind warning or extreme wind warning. :) (: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744 :) (: meters per second). (Wind value is in tenth of a meter per second) :) -for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2", "/dataCollection/data")/data +for $r in collection("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2")/dataCollection/data where $r/dataType eq "AWND" and xs:decimal($r/value) gt 491.744 return $r diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex3.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex3.xq index 28f747367..358d3f38d 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex3.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex3.xq @@ -19,7 +19,7 @@ (: Find the annual precipitation (PRCP) for a Seattle using the airport :) (: station (US000000002) for 2002. :) fn:sum( - for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2", "/dataCollection/data")/data + for $r in collection("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2")/dataCollection/data where $r/station eq "GHCND:US000000002" and $r/dataType eq "PRCP" and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2002 diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex4.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex4.xq index 317a1416c..bd5ba1c8b 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex4.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex4.xq @@ -18,7 +18,7 @@ (: Search Lucene Index :) (: Find the highest recorded temperature (TMAX) in Celsius. :) fn:max( - for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2", "/dataCollection/data")/data + for $r in collection("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2")/dataCollection/data where $r/dataType eq "TMAX" return $r/value ) div 10 diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex5.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex5.xq index 2deb4c3ef..77f6c2f30 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex5.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex5.xq @@ -18,6 +18,6 @@ (: Search Lucene Index :) (: Find all the weather readings for Washington state for a specific day :) (: 2002-2-2. :) -for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2", "/dataCollection/data")/data +for $r in collection("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2")/dataCollection/data where xs:dateTime(fn:data($r/date)) eq xs:dateTime("2002-02-02T00:00:00.000") return $r diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex6.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex6.xq index a0ce1e9ad..c1a45f433 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex6.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex6.xq @@ -18,6 +18,6 @@ (: Search Lucene Index :) (: Find all the weather readings for Washington state for a specific day :) (: 2002-2-2. :) -for $s in collection-from-index("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2", "/stationCollection/station")/station +for $s in collection("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2")/stationCollection/station where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "STATE 1")) return $s diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex7.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex7.xq index b3e622cd6..a776ab920 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex7.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-2/useIndex7.xq @@ -18,8 +18,8 @@ (: Search Lucene Index :) (: Find all the weather readings for Washington state for a specific day :) (: 2002-2-2. :) -for $s in collection-from-index("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2", "/stationCollection/station")/station -for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2", "/dataCollection/data")/data +for $s in collection("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2")/stationCollection/station +for $r in collection("src/test/resources/TestSources/ghcnd/half_1|src/test/resources/TestSources/ghcnd/half_2")/dataCollection/data where $s/id eq $r/station and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "STATE 1")) diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex1.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex1.xq index 0cccbc52a..2bc9ce759 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex1.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex1.xq @@ -16,7 +16,7 @@ under the License. :) (: Search Lucene Index :) -for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4", "/dataCollection/data")/data +for $r in collection("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4")/dataCollection/data let $datetime := xs:dateTime(fn:data($r/date)) where $r/station eq "GHCND:AS000000003" and fn:year-from-dateTime($datetime) ge 2000 diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex1_user.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex1_user.xq new file mode 100644 index 000000000..e740365a2 --- /dev/null +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex1_user.xq @@ -0,0 +1,25 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: Search Lucene Index :) +for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4")/dataCollection/data +let $datetime := xs:dateTime(fn:data($r/date)) +where $r/station eq "GHCND:AS000000003" + and fn:year-from-dateTime($datetime) ge 2000 + and fn:month-from-dateTime($datetime) eq 3 + and fn:day-from-dateTime($datetime) eq 3 +return $r diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex2.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex2.xq index c282e313f..a1b86acba 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex2.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex2.xq @@ -19,6 +19,6 @@ (: Find all reading for hurricane force wind warning or extreme wind warning. :) (: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744 :) (: meters per second). (Wind value is in tenth of a meter per second) :) -for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4", "/dataCollection/data")/data +for $r in collection("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4")/dataCollection/data where $r/dataType eq "AWND" and xs:decimal($r/value) gt 491.744 return $r diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex3.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex3.xq index 33ea1c954..9cc2b8eaf 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex3.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex3.xq @@ -19,7 +19,7 @@ (: Find the annual precipitation (PRCP) for a Seattle using the airport :) (: station (US000000002) for 2002. :) fn:sum( - for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4", "/dataCollection/data")/data + for $r in collection("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4")/dataCollection/data where $r/station eq "GHCND:US000000002" and $r/dataType eq "PRCP" and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2002 diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex4.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex4.xq index d213082aa..dd26e87cc 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex4.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex4.xq @@ -18,7 +18,7 @@ (: Search Lucene Index :) (: Find the highest recorded temperature (TMAX) in Celsius. :) fn:max( - for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4", "/dataCollection/data")/data + for $r in collection("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4")/dataCollection/data where $r/dataType eq "TMAX" return $r/value ) div 10 diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex5.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex5.xq index 1d98682ac..4a9d224bd 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex5.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex5.xq @@ -18,6 +18,6 @@ (: Search Lucene Index :) (: Find all the weather readings for Washington state for a specific day :) (: 2002-2-2. :) -for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4", "/dataCollection/data")/data +for $r in collection("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4")/dataCollection/data where xs:dateTime(fn:data($r/date)) eq xs:dateTime("2002-02-02T00:00:00.000") return $r diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex6.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex6.xq index abe21840a..440707916 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex6.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex6.xq @@ -18,6 +18,6 @@ (: Search Lucene Index :) (: Find all the weather readings for Washington state for a specific day :) (: 2002-2-2. :) -for $s in collection-from-index("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4", "/stationCollection/station")/station +for $s in collection("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4")/stationCollection/station where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "STATE 1")) return $s diff --git a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex7.xq b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex7.xq index 7b40ca00b..664b15034 100644 --- a/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex7.xq +++ b/vxquery-xtest/src/test/resources/Queries/XQuery/Indexing/Partition-4/useIndex7.xq @@ -18,8 +18,8 @@ (: Search Lucene Index :) (: Find all the weather readings for Washington state for a specific day :) (: 2002-2-2. :) -for $s in collection-from-index("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4", "/stationCollection/station")/station -for $r in collection-from-index("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4", "/dataCollection/data")/data +for $s in collection("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4")/stationCollection/station +for $r in collection("src/test/resources/TestSources/ghcnd/half_1/quarter_1|src/test/resources/TestSources/ghcnd/half_1/quarter_2|src/test/resources/TestSources/ghcnd/half_2/quarter_3|src/test/resources/TestSources/ghcnd/half_2/quarter_4")/dataCollection/data where $s/id eq $r/station and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "STATE 1")) diff --git a/vxquery-xtest/src/test/resources/VXQueryCatalog.xml b/vxquery-xtest/src/test/resources/VXQueryCatalog.xml index 5ecdb9436..8e2813587 100644 --- a/vxquery-xtest/src/test/resources/VXQueryCatalog.xml +++ b/vxquery-xtest/src/test/resources/VXQueryCatalog.xml @@ -353,7 +353,7 @@ &LibrariesInJSONiq; - + Serialize Function Queries @@ -365,7 +365,7 @@ &SerializationQueries; - + XML in JSON diff --git a/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml b/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml index 7cf6bf667..cc6b65be5 100644 --- a/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml +++ b/vxquery-xtest/src/test/resources/cat/IndexingQueries.xml @@ -35,6 +35,11 @@ useIndex1.txt + + Get Collection From Lucene Index + + useIndex1_user.txt + Get Collection From Lucene Index @@ -95,6 +100,11 @@ useIndex1.txt + + Get Collection From Lucene Index + + useIndex1_user.txt + Get Collection From Lucene Index @@ -145,6 +155,11 @@ useIndex1.txt + + Get Collection From Lucene Index + + useIndex1_user.txt + Get Collection From Lucene Index