From 2b45ca47cc5024674ba148cefca5d87403300332 Mon Sep 17 00:00:00 2001 From: Tamas Mate Date: Wed, 7 Jan 2026 09:49:59 +0100 Subject: [PATCH] GH-946: Add Variant extension type support Implements VariantType extension type with VariantVector for storing variant data with metadata and value buffers. Includes reader/writer implementations and comprehensive test coverage. --- .../impl/NullableVariantHolderReaderImpl.java | 68 ++ .../complex/impl/VariantReaderImpl.java | 72 ++ .../complex/impl/VariantWriterFactory.java | 45 + .../complex/impl/VariantWriterImpl.java | 87 ++ .../arrow/vector/extension/VariantType.java | 80 ++ .../arrow/vector/extension/VariantVector.java | 306 +++++++ .../vector/holders/NullableVariantHolder.java | 48 + .../arrow/vector/holders/VariantHolder.java | 48 + .../apache/arrow/vector/TestVariantType.java | 308 +++++++ .../arrow/vector/TestVariantVector.java | 861 ++++++++++++++++++ 10 files changed, 1923 insertions(+) create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableVariantHolderReaderImpl.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantReaderImpl.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java create mode 100644 vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/TestVariantType.java create mode 100644 vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableVariantHolderReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableVariantHolderReaderImpl.java new file mode 100644 index 000000000..0aa6246fb --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableVariantHolderReaderImpl.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.types.Types; + +public class NullableVariantHolderReaderImpl extends AbstractFieldReader { + private final NullableVariantHolder holder; + + public NullableVariantHolderReaderImpl(NullableVariantHolder holder) { + this.holder = holder; + } + + @Override + public int size() { + throw new UnsupportedOperationException("You can't call size on a Holder value reader."); + } + + @Override + public boolean next() { + throw new UnsupportedOperationException("You can't call next on a single value reader."); + } + + @Override + public void setPosition(int index) { + throw new UnsupportedOperationException("You can't call setPosition on a single value reader."); + } + + @Override + public Types.MinorType getMinorType() { + return Types.MinorType.EXTENSIONTYPE; + } + + @Override + public boolean isSet() { + return holder.isSet == 1; + } + + /** + * Reads the variant holder data into the provided holder. + * + * @param h the holder to read into + */ + public void read(NullableVariantHolder h) { + h.metadataStart = this.holder.metadataStart; + h.metadataEnd = this.holder.metadataEnd; + h.metadataBuffer = this.holder.metadataBuffer; + h.valueStart = this.holder.valueStart; + h.valueEnd = this.holder.valueEnd; + h.valueBuffer = this.holder.valueBuffer; + h.isSet = this.isSet() ? 1 : 0; + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantReaderImpl.java new file mode 100644 index 000000000..cf5509792 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantReaderImpl.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.extension.VariantVector; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.holders.VariantHolder; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.Field; + +public class VariantReaderImpl extends AbstractFieldReader { + private final VariantVector vector; + + public VariantReaderImpl(VariantVector vector) { + this.vector = vector; + } + + @Override + public Types.MinorType getMinorType() { + return this.vector.getMinorType(); + } + + @Override + public Field getField() { + return this.vector.getField(); + } + + @Override + public boolean isSet() { + return !this.vector.isNull(this.idx()); + } + + @Override + public void read(ExtensionHolder holder) { + if (holder instanceof VariantHolder) { + vector.get(idx(), (VariantHolder) holder); + } else if (holder instanceof NullableVariantHolder) { + vector.get(idx(), (NullableVariantHolder) holder); + } else { + throw new IllegalArgumentException( + "Unsupported holder type for VariantReader: " + holder.getClass()); + } + } + + public void read(VariantHolder h) { + this.vector.get(this.idx(), h); + } + + public void read(NullableVariantHolder h) { + this.vector.get(this.idx(), h); + } + + @Override + public Object readObject() { + return this.vector.getObject(this.idx()); + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java new file mode 100644 index 000000000..6d41b890e --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterFactory.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.extension.VariantVector; + +/** + * Factory for creating {@link VariantWriterImpl} instances. + * + *

This factory is used to create writers for Variant extension type vectors. + * + * @see VariantWriterImpl + * @see org.apache.arrow.vector.extension.VariantType + */ +public class VariantWriterFactory implements ExtensionTypeWriterFactory { + + /** + * Creates a writer implementation for the given extension type vector. + * + * @param extensionTypeVector the vector to create a writer for + * @return a {@link VariantWriterImpl} if the vector is a {@link VariantVector}, null otherwise + */ + @Override + public AbstractFieldWriter getWriterImpl(ExtensionTypeVector extensionTypeVector) { + if (extensionTypeVector instanceof VariantVector) { + return new VariantWriterImpl((VariantVector) extensionTypeVector); + } + return null; + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java new file mode 100644 index 000000000..ac5250283 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/VariantWriterImpl.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.extension.VariantVector; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.holders.VariantHolder; + +/** + * Writer implementation for VARIANT extension type vectors. + * + *

This writer handles writing variant data to a {@link VariantVector}. It accepts both {@link + * VariantHolder} and {@link NullableVariantHolder} objects containing metadata and value buffers + * and writes them to the appropriate position in the vector. + */ +public class VariantWriterImpl extends AbstractExtensionTypeWriter { + + private static final String UNSUPPORTED_TYPE_TEMPLATE = "Unsupported type for Variant: %s"; + + /** + * Constructs a new VariantWriterImpl for the given vector. + * + * @param vector the variant vector to write to + */ + public VariantWriterImpl(VariantVector vector) { + super(vector); + } + + /** + * Writes an extension type value to the vector. + * + *

This method validates that the object is an {@link ExtensionHolder} and delegates to {@link + * #write(ExtensionHolder)}. + * + * @param object the object to write, must be an {@link ExtensionHolder} + * @throws IllegalArgumentException if the object is not an {@link ExtensionHolder} + */ + @Override + public void writeExtension(Object object) { + if (object instanceof ExtensionHolder) { + write((ExtensionHolder) object); + } else { + throw new IllegalArgumentException( + String.format(UNSUPPORTED_TYPE_TEMPLATE, object.getClass().getName())); + } + } + + /** + * Writes a variant holder to the vector at the current position. + * + *

The holder can be either a {@link VariantHolder} (non-nullable, always set) or a {@link + * NullableVariantHolder} (nullable, may be null). The data is written using {@link + * VariantVector#setSafe(int, NullableVariantHolder)} which handles buffer allocation and copying. + * + * @param extensionHolder the variant holder to write, must be a {@link VariantHolder} or {@link + * NullableVariantHolder} + * @throws IllegalArgumentException if the holder is neither a {@link VariantHolder} nor a {@link + * NullableVariantHolder} + */ + @Override + public void write(ExtensionHolder extensionHolder) { + if (extensionHolder instanceof VariantHolder) { + vector.setSafe(getPosition(), (VariantHolder) extensionHolder); + } else if (extensionHolder instanceof NullableVariantHolder) { + vector.setSafe(getPosition(), (NullableVariantHolder) extensionHolder); + } else { + throw new IllegalArgumentException( + String.format(UNSUPPORTED_TYPE_TEMPLATE, extensionHolder.getClass().getName())); + } + vector.setValueCount(getPosition() + 1); + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java b/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java new file mode 100644 index 000000000..cf083fc49 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/extension/VariantType.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.extension; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; +import org.apache.arrow.vector.types.pojo.FieldType; + +public final class VariantType extends ExtensionType { + + public static final VariantType INSTANCE = new VariantType(); + + public static final String EXTENSION_NAME = "parquet.variant"; + + static { + ExtensionTypeRegistry.register(INSTANCE); + } + + private VariantType() {} + + @Override + public ArrowType storageType() { + return ArrowType.Struct.INSTANCE; + } + + @Override + public String extensionName() { + return EXTENSION_NAME; + } + + @Override + public boolean extensionEquals(ExtensionType other) { + return other instanceof VariantType; + } + + @Override + public String serialize() { + return ""; + } + + @Override + public ArrowType deserialize(ArrowType storageType, String serializedData) { + if (!storageType.equals(this.storageType())) { + throw new UnsupportedOperationException( + "Cannot construct VariantType from underlying type " + storageType); + } + return INSTANCE; + } + + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { + return new VariantVector(name, allocator); + } + + @Override + public boolean isComplex() { + // The type itself is not complex meaning we need separate functions to convert/extract + // different types. + // Meanwhile, the containing vector is complex in terms of containing multiple values (metadata + // and value) + return false; + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java b/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java new file mode 100644 index 000000000..cc087c4f8 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/extension/VariantVector.java @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.extension; + +import java.util.List; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.complex.AbstractStructVector; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.holders.VariantHolder; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.TransferPair; + +public class VariantVector extends ExtensionTypeVector { + + public static final String METADATA_VECTOR_NAME = "metadata"; + public static final String VALUE_VECTOR_NAME = "value"; + + private final Field rootField; + + /** + * Constructs a new VariantVector with the given name and allocator. + * + * @param name the name of the vector + * @param allocator the buffer allocator for memory management + */ + public VariantVector(String name, BufferAllocator allocator) { + super( + name, + allocator, + new StructVector( + name, + allocator, + FieldType.nullable(ArrowType.Struct.INSTANCE), + null, + AbstractStructVector.ConflictPolicy.CONFLICT_ERROR, + false)); + rootField = createVariantField(name); + ((FieldVector) this.getUnderlyingVector()) + .initializeChildrenFromFields(rootField.getChildren()); + } + + /** + * Creates a new VariantVector with the given name. The Variant Field schema has to be the same + * everywhere, otherwise ArrowBuffer loading might fail during serialization/deserialization and + * schema mismatches can occur. This includes CompleteType's VARIANT and VARIANT_REQUIRED types. + */ + public static Field createVariantField(String name) { + return new Field( + name, new FieldType(true, VariantType.INSTANCE, null), createVariantChildFields()); + } + + /** + * Creates the child fields for the VariantVector. Metadata vector will be index 0 and value + * vector will be index 1. + */ + public static List createVariantChildFields() { + return List.of( + new Field(METADATA_VECTOR_NAME, new FieldType(false, Binary.INSTANCE, null), null), + new Field(VALUE_VECTOR_NAME, new FieldType(false, Binary.INSTANCE, null), null)); + } + + @Override + public void initializeChildrenFromFields(List children) { + // No-op, as children are initialized in the constructor + } + + @Override + public Field getField() { + return rootField; + } + + public VarBinaryVector getMetadataVector() { + return getUnderlyingVector().getChild(METADATA_VECTOR_NAME, VarBinaryVector.class); + } + + public VarBinaryVector getValueVector() { + return getUnderlyingVector().getChild(VALUE_VECTOR_NAME, VarBinaryVector.class); + } + + @Override + public TransferPair makeTransferPair(ValueVector target) { + return new VariantTransferPair(this, (VariantVector) target); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new VariantTransferPair(this, new VariantVector(field.getName(), allocator)); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(field, allocator); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return new VariantTransferPair(this, new VariantVector(ref, allocator)); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(ref, allocator); + } + + @Override + public TransferPair getTransferPair(BufferAllocator allocator) { + return getTransferPair(this.getField().getName(), allocator); + } + + @Override + public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFrom(fromIndex, thisIndex, ((VariantVector) from).getUnderlyingVector()); + } + + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFromSafe(fromIndex, thisIndex, ((VariantVector) from).getUnderlyingVector()); + } + + @Override + public Object getObject(int index) { + return getUnderlyingVector().getObject(index); + } + + /** + * Retrieves the variant value at the specified index into the provided holder. + * + * @param index the index of the value to retrieve + * @param holder the holder to populate with the variant data + */ + public void get(int index, NullableVariantHolder holder) { + if (isNull(index)) { + holder.isSet = 0; + } else { + holder.isSet = 1; + VarBinaryVector metadataVector = getMetadataVector(); + VarBinaryVector valueVector = getValueVector(); + assert !metadataVector.isNull(index) && !valueVector.isNull(index); + + holder.metadataStart = metadataVector.getStartOffset(index); + holder.metadataEnd = metadataVector.getEndOffset(index); + holder.metadataBuffer = metadataVector.getDataBuffer(); + holder.valueStart = valueVector.getStartOffset(index); + holder.valueEnd = valueVector.getEndOffset(index); + holder.valueBuffer = valueVector.getDataBuffer(); + } + } + + /** + * Retrieves the variant value at the specified index into the provided non-nullable holder. + * + * @param index the index of the value to retrieve + * @param holder the holder to populate with the variant data + */ + public void get(int index, VariantHolder holder) { + VarBinaryVector metadataVector = getMetadataVector(); + VarBinaryVector valueVector = getValueVector(); + assert !metadataVector.isNull(index) && !valueVector.isNull(index); + + holder.metadataStart = metadataVector.getStartOffset(index); + holder.metadataEnd = metadataVector.getEndOffset(index); + holder.metadataBuffer = metadataVector.getDataBuffer(); + holder.valueStart = valueVector.getStartOffset(index); + holder.valueEnd = valueVector.getEndOffset(index); + holder.valueBuffer = valueVector.getDataBuffer(); + } + + /** + * Sets the variant value at the specified index from the provided holder. + * + * @param index the index at which to set the value + * @param holder the holder containing the variant data to set + */ + public void set(int index, VariantHolder holder) { + BitVectorHelper.setBit(getUnderlyingVector().getValidityBuffer(), index); + getMetadataVector() + .set(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().set(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + /** + * Sets the variant value at the specified index from the provided nullable holder. + * + * @param index the index at which to set the value + * @param holder the nullable holder containing the variant data to set + */ + public void set(int index, NullableVariantHolder holder) { + BitVectorHelper.setValidityBit(getUnderlyingVector().getValidityBuffer(), index, holder.isSet); + if (holder.isSet == 0) { + return; + } + getMetadataVector() + .set(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().set(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + /** + * Sets the variant value at the specified index from the provided holder, with bounds checking. + * + * @param index the index at which to set the value + * @param holder the holder containing the variant data to set + */ + public void setSafe(int index, VariantHolder holder) { + getUnderlyingVector().setIndexDefined(index); + getMetadataVector() + .setSafe(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().setSafe(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + /** + * Sets the variant value at the specified index from the provided nullable holder, with bounds + * checking. + * + * @param index the index at which to set the value + * @param holder the nullable holder containing the variant data to set + */ + public void setSafe(int index, NullableVariantHolder holder) { + if (holder.isSet == 0) { + getUnderlyingVector().setNull(index); + return; + } + getUnderlyingVector().setIndexDefined(index); + getMetadataVector() + .setSafe(index, 1, holder.metadataStart, holder.metadataEnd, holder.metadataBuffer); + getValueVector().setSafe(index, 1, holder.valueStart, holder.valueEnd, holder.valueBuffer); + } + + @Override + protected FieldReader getReaderImpl() { + return new org.apache.arrow.vector.complex.impl.VariantReaderImpl(this); + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + return getUnderlyingVector().hashCode(index, hasher); + } + + /** + * VariantTransferPair is a transfer pair for VariantVector. It transfers the metadata and value + * together using the underlyingVector's transfer pair. + */ + protected static class VariantTransferPair implements TransferPair { + private final TransferPair pair; + private final VariantVector from; + private final VariantVector to; + + public VariantTransferPair(VariantVector from, VariantVector to) { + this.from = from; + this.to = to; + this.pair = from.getUnderlyingVector().makeTransferPair((to).getUnderlyingVector()); + } + + @Override + public void transfer() { + pair.transfer(); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + pair.splitAndTransfer(startIndex, length); + } + + @Override + public ValueVector getTo() { + return to; + } + + @Override + public void copyValueSafe(int from, int to) { + pair.copyValueSafe(from, to); + } + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java new file mode 100644 index 000000000..6e80ce341 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/holders/NullableVariantHolder.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.holders; + +import org.apache.arrow.memory.ArrowBuf; + +@SuppressWarnings("checkstyle:VisibilityModifier") +public final class NullableVariantHolder extends ExtensionHolder { + + public int isSet; + public int metadataStart; + public int metadataEnd; + public ArrowBuf metadataBuffer; + public int valueStart; + public int valueEnd; + public ArrowBuf valueBuffer; + + public NullableVariantHolder() {} + + @Override + public boolean equals(Object obj) { + throw new UnsupportedOperationException(); + } + + @Override + public int hashCode() { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + throw new UnsupportedOperationException(); + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java new file mode 100644 index 000000000..1cf90f9c8 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/holders/VariantHolder.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.holders; + +import org.apache.arrow.memory.ArrowBuf; + +@SuppressWarnings("checkstyle:VisibilityModifier") +public final class VariantHolder extends ExtensionHolder { + + public final int isSet = 1; + public int metadataStart; + public int metadataEnd; + public ArrowBuf metadataBuffer; + public int valueStart; + public int valueEnd; + public ArrowBuf valueBuffer; + + public VariantHolder() {} + + @Override + public boolean equals(Object obj) { + throw new UnsupportedOperationException(); + } + + @Override + public int hashCode() { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + throw new UnsupportedOperationException(); + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/TestVariantType.java b/vector/src/test/java/org/apache/arrow/vector/TestVariantType.java new file mode 100644 index 000000000..115d9f46d --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/TestVariantType.java @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.extension.VariantType; +import org.apache.arrow.vector.extension.VariantVector; +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class TestVariantType { + BufferAllocator allocator; + + @BeforeEach + void beforeEach() { + allocator = new RootAllocator(); + } + + @AfterEach + void afterEach() { + allocator.close(); + } + + @Test + void testConstants() { + assertNotNull(VariantType.INSTANCE); + } + + @Test + void testStorageType() { + VariantType type = VariantType.INSTANCE; + assertEquals(ArrowType.Struct.INSTANCE, type.storageType()); + assertInstanceOf(ArrowType.Struct.class, type.storageType()); + } + + @Test + void testExtensionName() { + VariantType type = VariantType.INSTANCE; + assertEquals("parquet.variant", type.extensionName()); + } + + @Test + void testExtensionEquals() { + VariantType type1 = VariantType.INSTANCE; + VariantType type2 = VariantType.INSTANCE; + + assertTrue(type1.extensionEquals(type2)); + } + + @Test + void testIsComplex() { + VariantType type = VariantType.INSTANCE; + assertFalse(type.isComplex()); + } + + @Test + void testSerialize() { + VariantType type = VariantType.INSTANCE; + String serialized = type.serialize(); + assertEquals("", serialized); + } + + @Test + void testDeserializeValid() { + VariantType type = VariantType.INSTANCE; + ArrowType storageType = ArrowType.Struct.INSTANCE; + + ArrowType deserialized = assertDoesNotThrow(() -> type.deserialize(storageType, "")); + assertInstanceOf(VariantType.class, deserialized); + assertEquals(VariantType.INSTANCE, deserialized); + } + + @Test + void testDeserializeInvalidStorageType() { + VariantType type = VariantType.INSTANCE; + ArrowType wrongStorageType = ArrowType.Utf8.INSTANCE; + + assertThrows(UnsupportedOperationException.class, () -> type.deserialize(wrongStorageType, "")); + } + + @Test + void testGetNewVector() { + VariantType type = VariantType.INSTANCE; + try (FieldVector vector = + type.getNewVector("variant_field", FieldType.nullable(type), allocator)) { + assertInstanceOf(VariantVector.class, vector); + assertEquals("variant_field", vector.getField().getName()); + assertEquals(type, vector.getField().getType()); + } + } + + @Test + void testGetNewVectorWithNullableFieldType() { + VariantType type = VariantType.INSTANCE; + FieldType nullableFieldType = FieldType.nullable(type); + + try (FieldVector vector = type.getNewVector("nullable_variant", nullableFieldType, allocator)) { + assertInstanceOf(VariantVector.class, vector); + assertEquals("nullable_variant", vector.getField().getName()); + assertTrue(vector.getField().isNullable()); + } + } + + @Test + void testGetNewVectorWithNonNullableFieldType() { + VariantType type = VariantType.INSTANCE; + FieldType nonNullableFieldType = FieldType.notNullable(type); + + try (FieldVector vector = + type.getNewVector("non_nullable_variant", nonNullableFieldType, allocator)) { + assertInstanceOf(VariantVector.class, vector); + assertEquals("non_nullable_variant", vector.getField().getName()); + } + } + + @Test + void testIpcRoundTrip() { + VariantType type = VariantType.INSTANCE; + + Schema schema = new Schema(Collections.singletonList(Field.nullable("variant", type))); + byte[] serialized = schema.serializeAsMessage(); + Schema deserialized = Schema.deserializeMessage(ByteBuffer.wrap(serialized)); + assertEquals(schema, deserialized); + } + + @Test + void testVectorIpcRoundTrip() throws IOException { + VariantType type = VariantType.INSTANCE; + + try (FieldVector vector = type.getNewVector("field", FieldType.nullable(type), allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10)) { + VariantVector variantVector = (VariantVector) vector; + + byte[] metadata1 = new byte[] {1, 2, 3}; + byte[] value1 = new byte[] {4, 5, 6, 7}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + byte[] metadata2 = new byte[] {8, 9}; + byte[] value2 = new byte[] {10, 11, 12}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + NullableVariantHolder holder1 = new NullableVariantHolder(); + holder1.isSet = 1; + holder1.metadataStart = 0; + holder1.metadataEnd = metadata1.length; + holder1.metadataBuffer = metadataBuf1; + holder1.valueStart = 0; + holder1.valueEnd = value1.length; + holder1.valueBuffer = valueBuf1; + + NullableVariantHolder holder2 = new NullableVariantHolder(); + holder2.isSet = 1; + holder2.metadataStart = 0; + holder2.metadataEnd = metadata2.length; + holder2.metadataBuffer = metadataBuf2; + holder2.valueStart = 0; + holder2.valueEnd = value2.length; + holder2.valueBuffer = valueBuf2; + + variantVector.setSafe(0, holder1); + variantVector.setNull(1); + variantVector.setSafe(2, holder2); + variantVector.setValueCount(3); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (VectorSchemaRoot root = new VectorSchemaRoot(Collections.singletonList(variantVector)); + ArrowStreamWriter writer = + new ArrowStreamWriter(root, new DictionaryProvider.MapDictionaryProvider(), baos)) { + writer.start(); + writer.writeBatch(); + } + + try (ArrowStreamReader reader = + new ArrowStreamReader(new ByteArrayInputStream(baos.toByteArray()), allocator)) { + assertTrue(reader.loadNextBatch()); + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + assertEquals(3, root.getRowCount()); + assertEquals( + new Schema(Collections.singletonList(variantVector.getField())), root.getSchema()); + + VariantVector actual = assertInstanceOf(VariantVector.class, root.getVector("field")); + assertFalse(actual.isNull(0)); + assertTrue(actual.isNull(1)); + assertFalse(actual.isNull(2)); + + NullableVariantHolder result1 = new NullableVariantHolder(); + actual.get(0, result1); + assertEquals(1, result1.isSet); + assertEquals(metadata1.length, result1.metadataEnd - result1.metadataStart); + assertEquals(value1.length, result1.valueEnd - result1.valueStart); + + assertNull(actual.getObject(1)); + + NullableVariantHolder result2 = new NullableVariantHolder(); + actual.get(2, result2); + assertEquals(1, result2.isSet); + assertEquals(metadata2.length, result2.metadataEnd - result2.metadataStart); + assertEquals(value2.length, result2.valueEnd - result2.valueStart); + } + } + } + + @Test + void testSingleton() { + VariantType type1 = VariantType.INSTANCE; + VariantType type2 = VariantType.INSTANCE; + + // Same instance + assertSame(type1, type2); + assertTrue(type1.extensionEquals(type2)); + } + + @Test + void testExtensionTypeRegistry() { + // VariantType should be automatically registered via static initializer + ArrowType.ExtensionType registeredType = + ExtensionTypeRegistry.lookup(VariantType.EXTENSION_NAME); + assertNotNull(registeredType); + assertInstanceOf(VariantType.class, registeredType); + assertEquals(VariantType.INSTANCE, registeredType); + } + + @Test + void testFieldMetadata() { + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + + FieldType fieldType = new FieldType(true, VariantType.INSTANCE, null, metadata); + try (VariantVector vector = new VariantVector("test", allocator)) { + Field field = new Field("test", fieldType, VariantVector.createVariantChildFields()); + + // Field metadata includes both custom metadata and extension type metadata + Map fieldMetadata = field.getMetadata(); + assertEquals("value1", fieldMetadata.get("key1")); + assertEquals("value2", fieldMetadata.get("key2")); + // Extension type metadata is also present + assertTrue(fieldMetadata.containsKey("ARROW:extension:name")); + assertTrue(fieldMetadata.containsKey("ARROW:extension:metadata")); + } + } + + @Test + void testFieldChildren() { + try (VariantVector vector = new VariantVector("test", allocator)) { + Field field = vector.getField(); + + assertNotNull(field.getChildren()); + assertEquals(2, field.getChildren().size()); + + Field metadataField = field.getChildren().get(0); + assertEquals(VariantVector.METADATA_VECTOR_NAME, metadataField.getName()); + assertEquals(ArrowType.Binary.INSTANCE, metadataField.getType()); + + Field valueField = field.getChildren().get(1); + assertEquals(VariantVector.VALUE_VECTOR_NAME, valueField.getName()); + assertEquals(ArrowType.Binary.INSTANCE, valueField.getType()); + } + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java b/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java new file mode 100644 index 000000000..5774e67c6 --- /dev/null +++ b/vector/src/test/java/org/apache/arrow/vector/TestVariantVector.java @@ -0,0 +1,861 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.complex.impl.VariantReaderImpl; +import org.apache.arrow.vector.complex.impl.VariantWriterFactory; +import org.apache.arrow.vector.complex.impl.VariantWriterImpl; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.extension.VariantVector; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableVariantHolder; +import org.apache.arrow.vector.holders.VariantHolder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** Tests for VariantVector, VariantWriterImpl, and VariantReaderImpl. */ +class TestVariantVector { + + private BufferAllocator allocator; + + @BeforeEach + void beforeEach() { + allocator = new RootAllocator(); + } + + @AfterEach + void afterEach() { + allocator.close(); + } + + private VariantHolder createHolder( + ArrowBuf metadataBuf, byte[] metadata, ArrowBuf valueBuf, byte[] value) { + VariantHolder holder = new VariantHolder(); + holder.metadataStart = 0; + holder.metadataEnd = metadata.length; + holder.metadataBuffer = metadataBuf; + holder.valueStart = 0; + holder.valueEnd = value.length; + holder.valueBuffer = valueBuf; + return holder; + } + + private NullableVariantHolder createNullableHolder( + ArrowBuf metadataBuf, byte[] metadata, ArrowBuf valueBuf, byte[] value) { + NullableVariantHolder holder = new NullableVariantHolder(); + holder.isSet = 1; + holder.metadataStart = 0; + holder.metadataEnd = metadata.length; + holder.metadataBuffer = metadataBuf; + holder.valueStart = 0; + holder.valueEnd = value.length; + holder.valueBuffer = valueBuf; + return holder; + } + + private NullableVariantHolder createNullHolder() { + NullableVariantHolder holder = new NullableVariantHolder(); + holder.isSet = 0; + return holder; + } + + // ========== Basic Vector Tests ========== + + @Test + void testVectorCreation() { + try (VariantVector vector = new VariantVector("test", allocator)) { + assertNotNull(vector); + assertEquals("test", vector.getField().getName()); + assertNotNull(vector.getMetadataVector()); + assertNotNull(vector.getValueVector()); + } + } + + @Test + void testSetAndGet() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6, 7}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + // Retrieve and verify + NullableVariantHolder result = new NullableVariantHolder(); + vector.get(0, result); + + assertEquals(1, result.isSet); + assertEquals(metadata.length, result.metadataEnd - result.metadataStart); + assertEquals(value.length, result.valueEnd - result.valueStart); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + } + } + + @Test + void testSetNull() { + try (VariantVector vector = new VariantVector("test", allocator)) { + NullableVariantHolder holder = createNullHolder(); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + assertTrue(vector.isNull(0)); + + NullableVariantHolder result = new NullableVariantHolder(); + vector.get(0, result); + assertEquals(0, result.isSet); + } + } + + @Test + void testMultipleValues() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10)) { + + byte[] metadata1 = new byte[] {1, 2}; + byte[] value1 = new byte[] {3, 4, 5}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + NullableVariantHolder holder1 = + createNullableHolder(metadataBuf1, metadata1, valueBuf1, value1); + + byte[] metadata2 = new byte[] {6, 7, 8}; + byte[] value2 = new byte[] {9, 10}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + NullableVariantHolder holder2 = + createNullableHolder(metadataBuf2, metadata2, valueBuf2, value2); + + vector.setSafe(0, holder1); + vector.setSafe(1, holder2); + vector.setValueCount(2); + + // Verify first value + NullableVariantHolder result1 = new NullableVariantHolder(); + vector.get(0, result1); + assertEquals(1, result1.isSet); + + byte[] actualMetadata1 = new byte[metadata1.length]; + byte[] actualValue1 = new byte[value1.length]; + result1.metadataBuffer.getBytes(result1.metadataStart, actualMetadata1); + result1.valueBuffer.getBytes(result1.valueStart, actualValue1); + assertArrayEquals(metadata1, actualMetadata1); + assertArrayEquals(value1, actualValue1); + + // Verify second value + NullableVariantHolder result2 = new NullableVariantHolder(); + vector.get(1, result2); + assertEquals(1, result2.isSet); + + byte[] actualMetadata2 = new byte[metadata2.length]; + byte[] actualValue2 = new byte[value2.length]; + result2.metadataBuffer.getBytes(result2.metadataStart, actualMetadata2); + result2.valueBuffer.getBytes(result2.valueStart, actualValue2); + assertArrayEquals(metadata2, actualMetadata2); + assertArrayEquals(value2, actualValue2); + } + } + + @Test + void testNonNullableHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + VariantHolder holder = createHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + assertFalse(vector.isNull(0)); + + NullableVariantHolder result = new NullableVariantHolder(); + vector.get(0, result); + assertEquals(1, result.isSet); + } + } + + // ========== Writer Tests ========== + + @Test + void testWriteWithVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + VariantHolder holder = createHolder(metadataBuf, metadata, valueBuf, value); + + writer.setPosition(0); + writer.write(holder); + + assertEquals(1, vector.getValueCount()); + assertFalse(vector.isNull(0)); + } + } + + @Test + void testWriteWithNullableVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + writer.setPosition(0); + writer.write(holder); + + assertEquals(1, vector.getValueCount()); + assertFalse(vector.isNull(0)); + } + } + + @Test + void testWriteWithNullableVariantHolderNull() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector)) { + + NullableVariantHolder holder = createNullHolder(); + + writer.setPosition(0); + writer.write(holder); + + assertEquals(1, vector.getValueCount()); + assertTrue(vector.isNull(0)); + } + } + + @Test + void testWriteExtensionWithUnsupportedType() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector)) { + + writer.setPosition(0); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> writer.writeExtension("invalid-type")); + + assertTrue(exception.getMessage().contains("Unsupported type for Variant")); + } + } + + @Test + void testWriteWithUnsupportedHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + VariantWriterImpl writer = new VariantWriterImpl(vector)) { + + ExtensionHolder unsupportedHolder = new ExtensionHolder() {}; + + writer.setPosition(0); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> writer.write(unsupportedHolder)); + + assertTrue(exception.getMessage().contains("Unsupported type for Variant")); + } + } + + // ========== Reader Tests ========== + + @Test + void testReaderReadWithNullableVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + reader.setPosition(0); + + NullableVariantHolder result = new NullableVariantHolder(); + reader.read(result); + + assertEquals(1, result.isSet); + assertEquals(metadata.length, result.metadataEnd - result.metadataStart); + assertEquals(value.length, result.valueEnd - result.valueStart); + } + } + + @Test + void testReaderReadWithNullableVariantHolderNull() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.setNull(0); + vector.setValueCount(1); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + reader.setPosition(0); + + NullableVariantHolder holder = new NullableVariantHolder(); + reader.read(holder); + + assertEquals(0, holder.isSet); + } + } + + @Test + void testReaderIsSet() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setNull(1); + vector.setValueCount(2); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + + reader.setPosition(0); + assertTrue(reader.isSet()); + + reader.setPosition(1); + assertFalse(reader.isSet()); + } + } + + @Test + void testReaderGetMinorType() { + try (VariantVector vector = new VariantVector("test", allocator)) { + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + assertEquals(vector.getMinorType(), reader.getMinorType()); + } + } + + @Test + void testReaderGetField() { + try (VariantVector vector = new VariantVector("test", allocator)) { + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + assertEquals(vector.getField(), reader.getField()); + assertEquals("test", reader.getField().getName()); + } + } + + @Test + void testReaderReadWithNonNullableVariantHolder() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + VariantReaderImpl reader = (VariantReaderImpl) vector.getReader(); + reader.setPosition(0); + + VariantHolder result = new VariantHolder(); + reader.read(result); + + // Verify the data was read correctly + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + assertEquals(1, result.isSet); + } + } + + // ========== Factory Tests ========== + + @Test + void testVariantWriterFactory() { + VariantWriterFactory factory = new VariantWriterFactory(); + + try (VariantVector vector = new VariantVector("test", allocator)) { + FieldWriter writer = factory.getWriterImpl(vector); + assertNotNull(writer); + assertTrue(writer instanceof VariantWriterImpl); + } + } + + @Test + void testVariantWriterFactoryWithNonVariantVector() { + VariantWriterFactory factory = new VariantWriterFactory(); + + // Use UuidVector as a different extension type + try (UuidVector uuidVector = new UuidVector("uuid", allocator)) { + FieldWriter writer = factory.getWriterImpl(uuidVector); + assertNull(writer); + } + } + + // ========== Transfer Pair Tests ========== + + @Test + void testTransferPair() { + try (VariantVector fromVector = new VariantVector("from", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5, 6, 7}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.getTransferPair(allocator); + VariantVector toVector = (VariantVector) transferPair.getTo(); + + transferPair.transfer(); + + assertEquals(0, fromVector.getValueCount()); + assertEquals(1, toVector.getValueCount()); + + NullableVariantHolder result = new NullableVariantHolder(); + toVector.get(0, result); + assertEquals(1, result.isSet); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + + toVector.close(); + } + } + + @Test + void testSplitAndTransfer() { + try (VariantVector fromVector = new VariantVector("from", allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10); + ArrowBuf metadataBuf3 = allocator.buffer(10); + ArrowBuf valueBuf3 = allocator.buffer(10)) { + + byte[] metadata1 = new byte[] {1}; + byte[] value1 = new byte[] {2, 3}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + byte[] metadata2 = new byte[] {4, 5}; + byte[] value2 = new byte[] {6}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + byte[] metadata3 = new byte[] {7, 8, 9}; + byte[] value3 = new byte[] {10, 11, 12}; + metadataBuf3.setBytes(0, metadata3); + valueBuf3.setBytes(0, value3); + + NullableVariantHolder holder1 = + createNullableHolder(metadataBuf1, metadata1, valueBuf1, value1); + NullableVariantHolder holder2 = + createNullableHolder(metadataBuf2, metadata2, valueBuf2, value2); + NullableVariantHolder holder3 = + createNullableHolder(metadataBuf3, metadata3, valueBuf3, value3); + + fromVector.setSafe(0, holder1); + fromVector.setSafe(1, holder2); + fromVector.setSafe(2, holder3); + fromVector.setValueCount(3); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.getTransferPair(allocator); + VariantVector toVector = (VariantVector) transferPair.getTo(); + + // Split and transfer indices 1-2 (middle and last) + transferPair.splitAndTransfer(1, 2); + + assertEquals(2, toVector.getValueCount()); + + // Verify transferred values + NullableVariantHolder result1 = new NullableVariantHolder(); + toVector.get(0, result1); + assertEquals(1, result1.isSet); + + byte[] actualMetadata1 = new byte[metadata2.length]; + byte[] actualValue1 = new byte[value2.length]; + result1.metadataBuffer.getBytes(result1.metadataStart, actualMetadata1); + result1.valueBuffer.getBytes(result1.valueStart, actualValue1); + assertArrayEquals(metadata2, actualMetadata1); + assertArrayEquals(value2, actualValue1); + + NullableVariantHolder result2 = new NullableVariantHolder(); + toVector.get(1, result2); + assertEquals(1, result2.isSet); + + byte[] actualMetadata2 = new byte[metadata3.length]; + byte[] actualValue2 = new byte[value3.length]; + result2.metadataBuffer.getBytes(result2.metadataStart, actualMetadata2); + result2.valueBuffer.getBytes(result2.valueStart, actualValue2); + assertArrayEquals(metadata3, actualMetadata2); + assertArrayEquals(value3, actualValue2); + + toVector.close(); + } + } + + @Test + void testCopyValueSafe() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.makeTransferPair(toVector); + + transferPair.copyValueSafe(0, 0); + toVector.setValueCount(1); + + // Verify the value was copied + NullableVariantHolder result = new NullableVariantHolder(); + toVector.get(0, result); + assertEquals(1, result.isSet); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + + // Original vector should still have the value + NullableVariantHolder originalResult = new NullableVariantHolder(); + fromVector.get(0, originalResult); + assertEquals(1, originalResult.isSet); + } + } + + @Test + void testGetTransferPairWithField() { + try (VariantVector fromVector = new VariantVector("from", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + org.apache.arrow.vector.util.TransferPair transferPair = + fromVector.getTransferPair(fromVector.getField(), allocator); + VariantVector toVector = (VariantVector) transferPair.getTo(); + + transferPair.transfer(); + + assertEquals(1, toVector.getValueCount()); + assertEquals(fromVector.getField().getName(), toVector.getField().getName()); + + toVector.close(); + } + } + + // ========== Copy Operations Tests ========== + + @Test + void testCopyFrom() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2, 3}; + byte[] value = new byte[] {4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setValueCount(1); + + toVector.allocateNew(); + toVector.copyFrom(0, 0, fromVector); + toVector.setValueCount(1); + + NullableVariantHolder result = new NullableVariantHolder(); + toVector.get(0, result); + assertEquals(1, result.isSet); + + byte[] actualMetadata = new byte[metadata.length]; + byte[] actualValue = new byte[value.length]; + result.metadataBuffer.getBytes(result.metadataStart, actualMetadata); + result.valueBuffer.getBytes(result.valueStart, actualValue); + + assertArrayEquals(metadata, actualMetadata); + assertArrayEquals(value, actualValue); + } + } + + @Test + void testCopyFromSafe() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf1 = allocator.buffer(10); + ArrowBuf valueBuf1 = allocator.buffer(10); + ArrowBuf metadataBuf2 = allocator.buffer(10); + ArrowBuf valueBuf2 = allocator.buffer(10)) { + + byte[] metadata1 = new byte[] {1}; + byte[] value1 = new byte[] {2, 3}; + metadataBuf1.setBytes(0, metadata1); + valueBuf1.setBytes(0, value1); + + NullableVariantHolder holder1 = + createNullableHolder(metadataBuf1, metadata1, valueBuf1, value1); + + byte[] metadata2 = new byte[] {4, 5}; + byte[] value2 = new byte[] {6}; + metadataBuf2.setBytes(0, metadata2); + valueBuf2.setBytes(0, value2); + + NullableVariantHolder holder2 = + createNullableHolder(metadataBuf2, metadata2, valueBuf2, value2); + + fromVector.setSafe(0, holder1); + fromVector.setSafe(1, holder2); + fromVector.setValueCount(2); + + // Copy without pre-allocating toVector + for (int i = 0; i < 2; i++) { + toVector.copyFromSafe(i, i, fromVector); + } + toVector.setValueCount(2); + + // Verify both values + NullableVariantHolder result1 = new NullableVariantHolder(); + toVector.get(0, result1); + assertEquals(1, result1.isSet); + + byte[] actualMetadata1 = new byte[metadata1.length]; + byte[] actualValue1 = new byte[value1.length]; + result1.metadataBuffer.getBytes(result1.metadataStart, actualMetadata1); + result1.valueBuffer.getBytes(result1.valueStart, actualValue1); + assertArrayEquals(metadata1, actualMetadata1); + assertArrayEquals(value1, actualValue1); + + NullableVariantHolder result2 = new NullableVariantHolder(); + toVector.get(1, result2); + assertEquals(1, result2.isSet); + + byte[] actualMetadata2 = new byte[metadata2.length]; + byte[] actualValue2 = new byte[value2.length]; + result2.metadataBuffer.getBytes(result2.metadataStart, actualMetadata2); + result2.valueBuffer.getBytes(result2.valueStart, actualValue2); + assertArrayEquals(metadata2, actualMetadata2); + assertArrayEquals(value2, actualValue2); + } + } + + @Test + void testCopyFromWithNulls() { + try (VariantVector fromVector = new VariantVector("from", allocator); + VariantVector toVector = new VariantVector("to", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + fromVector.setSafe(0, holder); + fromVector.setNull(1); + fromVector.setSafe(2, holder); + fromVector.setValueCount(3); + + toVector.allocateNew(); + for (int i = 0; i < 3; i++) { + toVector.copyFromSafe(i, i, fromVector); + } + toVector.setValueCount(3); + + assertFalse(toVector.isNull(0)); + assertTrue(toVector.isNull(1)); + assertFalse(toVector.isNull(2)); + } + } + + // ========== GetObject Tests ========== + + @Test + void testGetObject() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1, 2}; + byte[] value = new byte[] {3, 4, 5}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + Object obj = vector.getObject(0); + assertNotNull(obj); + } + } + + @Test + void testGetObjectNull() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.setNull(0); + vector.setValueCount(1); + + Object obj = vector.getObject(0); + assertNull(obj); + } + } + + // ========== Allocate and Capacity Tests ========== + + @Test + void testAllocateNew() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.allocateNew(); + assertTrue(vector.getValueCapacity() > 0); + } + } + + @Test + void testSetInitialCapacity() { + try (VariantVector vector = new VariantVector("test", allocator)) { + vector.setInitialCapacity(100); + vector.allocateNew(); + assertTrue(vector.getValueCapacity() >= 100); + } + } + + @Test + void testClearAndReuse() { + try (VariantVector vector = new VariantVector("test", allocator); + ArrowBuf metadataBuf = allocator.buffer(10); + ArrowBuf valueBuf = allocator.buffer(10)) { + + byte[] metadata = new byte[] {1}; + byte[] value = new byte[] {2}; + metadataBuf.setBytes(0, metadata); + valueBuf.setBytes(0, value); + + NullableVariantHolder holder = createNullableHolder(metadataBuf, metadata, valueBuf, value); + + vector.setSafe(0, holder); + vector.setValueCount(1); + + assertFalse(vector.isNull(0)); + + vector.clear(); + vector.allocateNew(); + + // After clear, vector should be empty + assertEquals(0, vector.getValueCount()); + } + } +}