Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# SCM syntax highlighting & preventing 3-way merges
pixi.lock merge=binary linguist-language=YAML linguist-generated=true

magic.lock merge=binary linguist-language=YAML linguist-generated=true
14 changes: 6 additions & 8 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,16 @@ jobs:
- name: Checkout repo
uses: actions/checkout@v2

- name: Install mojo
- name: Install magic
run: |
curl -s https://get.modular.com | sh -
modular auth 42069
modular install nightly/mojo
echo "/home/runner/.modular/pkg/packages.modular.com_nightly_mojo/bin:$PATH" >> $GITHUB_PATH
curl -ssL https://magic.modular.com/6b3752cd-debc-45dd-b249-5d4941e1c18c | bash
echo "/home/runner/.modular/bin:$PATH" >> $GITHUB_PATH
/home/runner/.modular/bin/magic project platform add linux-64

- name: checks
run: |
pip install -q pre-commit
pre-commit run -a
/home/runner/.modular/bin/magic run pre-commit run -a

- name: tests
run: |
mojo test -I .
/home/runner/.modular/bin/magic run test
12 changes: 6 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
dist/
.venv/
install_id
**/.DS_Store
.scratch/
.vscode/

# pixi environments
.pixi
*.egg-info
# magic environments
.magic
37 changes: 9 additions & 28 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,18 @@ This repo is very much a work in progress. The goal is to provide a way to use A

## Dev Setup

If you have all the prerequisites, you should be able to just run
```bash
make setup
```
Install magic by following the instructions [here](https://www.modular.com/docs/magic/install).

Prerequisites
- [Mojo](https://www.modular.com/max/mojo)
- Python 3.11 (recommended via pyenv, but not required)
- [`uv`](https://github.com/astral-sh/uv) for python package management. Needed for `make setup` to work but optional. You can also use `pip install -r requirements.txt` instead.
You should be able to just run

If you would like to manually create your python virtual env, use this command
```
python3 -m venv .venv
```bash
magic run test
```

The makefile contains some helpful commands:
- `make setup` - Install python dependencies & setup .venv
- `make test` - Run tests
- `make fmt` - Run formatter
- `make build` - Build the package
- `make clean` - Clean up build artifacts

However, for `make` commands to work
`MODULAR_HOME` and `PATH` must be configured in `~/.zprofile` or `~/.bash_profile` in addition to `~/.zshrc` or `~/.bashrc`.
The magic project file contains the tasks:
- `magic run test` - Run tests
- `magic run dist` - Builds the distribution package
- `magic run build` - Build the package
- `magic run clean` - Clean up build artifacts

```bash
export MODULAR_HOME="$HOME/.modular"

# Pick one of the following, don't use both
# Option A: Nightly Mojo
export PATH="$HOME/.modular/pkg/packages.modular.com_nightly_mojo/bin:$PATH"
# Option B: Stable Mojo
export PATH="$HOME/.modular/pkg/packages.modular.com_mojo/bin:$PATH"
```
6 changes: 3 additions & 3 deletions arrow/array/bool_array.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,21 @@ struct ArrowBooleanArray:
var _buffer: Bitmap
var mem_used: Int

fn __init__(inout self, values: List[Bool]):
fn __init__(mut self, values: List[Bool]):
self.length = len(values)
self.null_count = 0
self._validity = Bitmap(List(True) * len(values))
self._buffer = Bitmap(values)
self.mem_used = self._validity.mem_used + self._buffer.mem_used

fn __init__(inout self, length: Int):
fn __init__(mut self, length: Int):
self.length = length
self.null_count = 0
self._validity = Bitmap(List(True) * length)
self._buffer = Bitmap(length)
self.mem_used = self._validity.mem_used + self._buffer.mem_used

fn __init__(inout self, values: List[Optional[Bool]]):
fn __init__(mut self, values: List[Optional[Bool]]):
self.length = len(values)
self.null_count = 0
var validity_list = List[Bool](capacity=len(values))
Expand Down
20 changes: 12 additions & 8 deletions arrow/buffer/binary.mojo
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
from arrow.util import ALIGNMENT, get_num_bytes_with_padding
from memory import memset_zero, UnsafePointer


@value
struct BinaryBuffer:
alias _ptr_type = UnsafePointer[UInt8]
alias _ptr_type = UnsafePointer[UInt8, alignment=ALIGNMENT]
var _buffer: Self._ptr_type
var length: Int
var mem_used: Int

fn __init__(inout self, length_unpadded: Int):
fn __init__(mut self, length_unpadded: Int):
self.length = length_unpadded
self.mem_used = get_num_bytes_with_padding(length_unpadded)
self._buffer = Self._ptr_type.alloc(self.mem_used, alignment=ALIGNMENT)
self._buffer = Self._ptr_type.alloc(self.mem_used)
memset_zero(self._buffer, self.mem_used)

fn __init__(inout self, values: List[UInt8]):
fn __init__(mut self, values: List[UInt8]):
self = Self(len(values))
self._unsafe_set_sequence(0, values)

Expand Down Expand Up @@ -46,13 +47,16 @@ struct BinaryBuffer:
self._unsafe_set_sequence(start, values)

fn _unsafe_get_sequence(self, start: Int, length: Int) -> List[UInt8]:
"""Build a new List of UInt8 from the BinaryBuffer starting at `start` for `length` bytes.
"""

var values = List[UInt8](capacity=length)
for i in range(length):
values.append(self._unsafe_getitem(start + i))
return values

fn _unsafe_get_sequence(
self, start: Int, length: Int, inout bytes: List[UInt8]
self, start: Int, length: Int, mut bytes: List[UInt8]
):
for i in range(length):
bytes[i] = self._unsafe_getitem(start + i)
Expand All @@ -67,15 +71,15 @@ struct BinaryBuffer:

# Lifecycle methods

fn __moveinit__(inout self, owned existing: BinaryBuffer):
fn __moveinit__(mut self, owned existing: BinaryBuffer):
self._buffer = existing._buffer
self.length = existing.length
self.mem_used = existing.mem_used

fn __copyinit__(inout self, existing: BinaryBuffer):
fn __copyinit__(mut self, existing: BinaryBuffer):
self.length = existing.length
self.mem_used = existing.mem_used
self._buffer = Self._ptr_type.alloc(self.mem_used, alignment=ALIGNMENT)
self._buffer = Self._ptr_type.alloc(self.mem_used)
for i in range(self.mem_used):
self._buffer[i] = existing._buffer[i]

Expand Down
19 changes: 8 additions & 11 deletions arrow/buffer/bitmap.mojo
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from memory import memset_zero
from arrow.util import PADDING, ALIGNMENT, get_num_bytes_with_padding
from memory import UnsafePointer


struct Bitmap(StringableRaising):
Expand All @@ -23,12 +24,12 @@ struct Bitmap(StringableRaising):
```
"""

alias _ptr_type = UnsafePointer[UInt8]
alias _ptr_type = UnsafePointer[UInt8, alignment=ALIGNMENT]
var _buffer: Self._ptr_type
var length: Int
var mem_used: Int

fn __init__(inout self, length_unpadded: Int):
fn __init__(mut self, length_unpadded: Int):
"""Creates a new Bitmap that supports at least `length_unpadded` elements.

Args:
Expand All @@ -39,14 +40,12 @@ struct Bitmap(StringableRaising):
var num_bytes = (length_unpadded + 7) // 8
var num_bytes_with_padding = get_num_bytes_with_padding(num_bytes)

self._buffer = Self._ptr_type.alloc[alignment=ALIGNMENT](
num_bytes_with_padding
)
self._buffer = Self._ptr_type.alloc(num_bytes_with_padding)
memset_zero(self._buffer, num_bytes_with_padding)
self.length = length_unpadded
self.mem_used = num_bytes_with_padding

fn __init__(inout self, bools: List[Bool]):
fn __init__(mut self, bools: List[Bool]):
self = Self(len(bools))

for i in range(len(bools)):
Expand Down Expand Up @@ -86,15 +85,13 @@ struct Bitmap(StringableRaising):
fn __del__(owned self):
self._buffer.free()

fn __moveinit__(inout self, owned existing: Bitmap):
fn __moveinit__(mut self, owned existing: Bitmap):
self._buffer = existing._buffer
self.length = existing.length
self.mem_used = existing.mem_used

fn __copyinit__(inout self, existing: Bitmap):
self._buffer = Self._ptr_type.alloc(
existing.mem_used, alignment=ALIGNMENT
)
fn __copyinit__(mut self, existing: Bitmap):
self._buffer = Self._ptr_type.alloc(existing.mem_used)
for i in range(existing.mem_used):
self._buffer[i] = existing._buffer[i]
self.length = existing.length
Expand Down
16 changes: 9 additions & 7 deletions arrow/buffer/dtype.mojo
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
from arrow.util import ALIGNMENT, get_num_bytes_with_padding
from memory import UnsafePointer, memset_zero
from sys.info import sizeof


struct DTypeBuffer[element_type: DType]:
alias _scalar_type = Scalar[element_type]
alias _ptr_type = UnsafePointer[Self._scalar_type]
alias _ptr_type = UnsafePointer[Self._scalar_type, alignment=ALIGNMENT]
alias element_byte_width = sizeof[Scalar[element_type]]()
var _buffer: Self._ptr_type
var length: Int
var mem_used: Int

fn __init__(inout self, length: Int = 0):
fn __init__(mut self, length: Int = 0):
self.length = length
var num_bytes = self.length * Self.element_byte_width
self.mem_used = get_num_bytes_with_padding(num_bytes)

var alloc_count = self.mem_used // Self.element_byte_width
self._buffer = Self._ptr_type.alloc(alloc_count, alignment=ALIGNMENT)
self._buffer = Self._ptr_type.alloc(alloc_count)
memset_zero(self._buffer, alloc_count)

fn __init__(inout self, values: List[Self._scalar_type]):
fn __init__(mut self, values: List[Self._scalar_type]):
self = Self(len(values))
for i in range(len(values)):
self._unsafe_setitem(i, values[i])
Expand All @@ -44,15 +46,15 @@ struct DTypeBuffer[element_type: DType]:
fn __len__(self) -> Int:
return self.length

fn __moveinit__(inout self, owned existing: Self):
fn __moveinit__(mut self, owned existing: Self):
self._buffer = existing._buffer
self.length = existing.length
self.mem_used = existing.mem_used

fn __copyinit__(inout self, existing: Self):
fn __copyinit__(mut self, existing: Self):
self.length = existing.length
self.mem_used = existing.mem_used
self._buffer = Self._ptr_type.alloc(self.mem_used, alignment=ALIGNMENT)
self._buffer = Self._ptr_type.alloc(self.mem_used)
for i in range(self.mem_used):
self._buffer[i] = existing._buffer[i]

Expand Down
6 changes: 3 additions & 3 deletions arrow/physical_layout/arrow.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ from arrow.buffer.dtype import DTypeBuffer

# var mem_use: Int

# fn __init__(inout self, values: List[Scalar[T]]) raises:
# fn __init__(mut self, values: List[Scalar[T]]) raises:
# self._value_buffer = DTypeBuffer[Scalar[T]](len(values))

# var validity_list = List[Bool](len(values))
Expand Down Expand Up @@ -49,12 +49,12 @@ struct ArrowIntVector:
var value_buffer: OffsetBuffer64
var mem_used: Int

fn __init__(inout self, values: List[Int64]):
fn __init__(mut self, values: List[Int64]):
self.length = len(values)
self.value_buffer = OffsetBuffer64(values)

var validity_list = List[Bool](capacity=len(values))
for i in range(values.size):
for i in range(len(values)):
validity_list.append(True)
var val = values[i]
self.value_buffer._unsafe_setitem(i, val)
Expand Down
8 changes: 3 additions & 5 deletions arrow/physical_layout/fixed_size_list.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ struct FixedSizedList[element_type: DType]:

var mem_used: Int

fn __init__(
inout self, values: List[List[Scalar[Self.element_type]]]
) raises:
fn __init__(mut self, values: List[List[Scalar[Self.element_type]]]) raises:
self.length = len(values)
self.list_size = len(values[0])
self.value_buffer = DTypeBuffer[element_type](
Expand All @@ -32,7 +30,7 @@ struct FixedSizedList[element_type: DType]:
# TODO: support nulls
if len(values[i]) != self.list_size:
raise Error(
"FixedSizedList: list size mismatch on index: " + str(i)
"FixedSizedList: list size mismatch on index: " + String(i)
)
validity_list.append(True)
for j in range(self.list_size):
Expand All @@ -48,7 +46,7 @@ struct FixedSizedList[element_type: DType]:
if index < 0 or index >= self.length:
raise Error("index out of range for FixedSizedList")
var ret = List[Scalar[Self.element_type]](capacity=self.list_size)
var offset = int(self.list_size * index)
var offset = Int(self.list_size * index)
for i in range(self.list_size):
ret.append(self.value_buffer[offset + i])
return ret
16 changes: 11 additions & 5 deletions arrow/physical_layout/varbinary.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@ struct ArrowStringVector:
var value_buffer: BinaryBuffer
var mem_used: Int

fn __init__(inout self, values: List[String]):
fn __init__(mut self, values: List[String]):
var validity_list = List[Bool](capacity=len(values))
var offset_list = List[Int32](capacity=len(values) + 1)

# Calculate the size of the buffer and allocate it
var buffer_size = 0
for i in range(len(values)):
buffer_size += values[i]._buffer.size
buffer_size += len(values[i]._buffer)
self.value_buffer = BinaryBuffer(buffer_size)

offset_list.append(0)
var offset_cursor = 0
for i in range(len(values)):
validity_list.append(True)
var bytes = values[i].as_bytes()
self.value_buffer._unsafe_set_sequence(offset_cursor, bytes)
self.value_buffer._unsafe_set_sequence(offset_cursor, List(bytes))
offset_cursor += len(bytes)
offset_list.append(offset_cursor)

Expand All @@ -43,12 +43,18 @@ struct ArrowStringVector:
var length = self.offsets[index + 1] - start

var bytes = self.value_buffer._unsafe_get_sequence(
int(start), int(length)
Int(start), Int(length)
)

bytes.extend(List[UInt8](0))
var ret = String(bytes)
var ret = String(buffer=bytes)
return ret

fn __setitem__(self, index: Int, value: String) raises:
if index < 0 or index >= self.length:
raise Error("index out of range for ArrowStringVector")
var bytes = value.as_bytes()
self.value_buffer.set_sequence(Int(self.offsets[index]), List(bytes))

fn __len__(self) -> Int:
return self.length
Loading