diff --git a/.github/scripts/setup_azurite.sh b/.github/scripts/setup_azurite.sh index 1f0bcbf..cb3aec4 100644 --- a/.github/scripts/setup_azurite.sh +++ b/.github/scripts/setup_azurite.sh @@ -60,7 +60,7 @@ tar xzvf $GITHUB_WORKSPACE/test/inputs/sanity.test.tgz -C oldstyle_dir az storage blob upload-batch -d test/oldstyle-dir -s oldstyle_dir --connection-string $AZURE_CONNECTION_STRING export OLDSTYLE_DIR=az://test/oldstyle-dir -cd $GITHUB_WORKSPACE/examples +cd $GITHUB_WORKSPACE/test/scripts tar xzvf examples_ws.tgz az storage blob upload-batch -d test/ws -s ws --connection-string $AZURE_CONNECTION_STRING export WORKSPACE=az://test/ws diff --git a/.github/workflows/basic.yml b/.github/workflows/basic.yml index 57a600b..a87a2c1 100644 --- a/.github/workflows/basic.yml +++ b/.github/workflows/basic.yml @@ -96,14 +96,14 @@ jobs: run: | PYTHONPATH=. pytest PYTHONPATH=. python test/test.py - PYTHONPATH=. examples/test.sh + PYTHONPATH=. test/scripts/test.sh - name: Cloud Test - Azurite run: | source .github/scripts/setup_azurite.sh echo "Testing on Azurite..." echo "WORKSPACE=$WORKSPACE OLDSTYLE_DIR=$OLDSTYLE_DIR" - PYTHONPATH=. ./examples/test.sh + PYTHONPATH=. test/scripts/test.sh ls -l /tmp/tiledb_bookkeeping echo "Testing on Azurite DONE" diff --git a/Makefile b/Makefile index 172b359..3e04ff8 100644 --- a/Makefile +++ b/Makefile @@ -55,14 +55,14 @@ clean-test: ## remove test and coverage artifacts rm -fr .pytest_cache format: ## format files with black and isort - black --line-length 120 setup.py src test examples/genomicsdb_* - isort --profile black --line-length 120 setup.py src test examples/genomicsdb_* + black --line-length 120 setup.py src test genomicsdb/scripts + isort --profile black --line-length 120 setup.py src test genomicsdb/scripts lint: ## check style with flake8 and vulnerabilities with bandit bandit -r setup.py src - flake8 --extend-ignore='E203, N803, N806, E402' --max-line-length=120 setup.py src test examples/genomicsdb_* - black --check --line-length 120 setup.py src test examples/genomicsdb_* - isort --profile black --line-length 120 -c setup.py src test examples/genomicsdb_* + flake8 --extend-ignore='E203, N803, N806, E402' --max-line-length=120 setup.py src test genomicsdb/scripts + black --check --line-length 120 setup.py src test genomicsdb/scripts + isort --profile black --line-length 120 -c setup.py src test genomicsdb/scripts cython-lint --max-line-length 120 src/*.pyx test: FORCE ## run tests quickly with the default Python diff --git a/README.md b/README.md index 0b62def..7356969 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![pypi](https://img.shields.io/pypi/v/genomicsdb.svg)](https://pypi.org/project/genomicsdb/) # GenomicsDB-Python -Experimental Python 3 Bindings to the native [GenomicsDB](https://github.com/GenomicsDB/GenomicsDB) library. Only queries are supported for now. For importing vcf files into GenomicsDB, use the command line tools - `vcf2genomicsdb` or `gatk GenomicsDBImport`. +Python 3 Bindings to the native [GenomicsDB](https://github.com/GenomicsDB/GenomicsDB) library. Only queries are supported for now. For importing vcf files into GenomicsDB, use the command line tools - `vcf2genomicsdb` or `gatk GenomicsDBImport`. ## Installation : Only Linux and MacOS are currently supported Install `genomicsdb` binary wheels from PyPi with pip: @@ -19,5 +19,8 @@ cd genomicsdb- python setup.py install ``` +## GenomicsDB console scripts +See [GenomicsDB query tool](https://github.com/GenomicsDB/GenomicsDB-Python/blob/master/genomicsdb/scripts/README.md). The available scripts are `genomicsdb_query` and `genomicsdb_cache` with the supported output options being csv, json and parquet files. + ## Development See [instructions](https://github.com/GenomicsDB/GenomicsDB-Python/blob/master/INSTALL.md) for local builds and running tests. diff --git a/examples/README.md b/genomicsdb/scripts/README.md similarity index 100% rename from examples/README.md rename to genomicsdb/scripts/README.md diff --git a/genomicsdb/scripts/__init__.py b/genomicsdb/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/genomicsdb_cache b/genomicsdb/scripts/genomicsdb_cache.py old mode 100755 new mode 100644 similarity index 98% rename from examples/genomicsdb_cache rename to genomicsdb/scripts/genomicsdb_cache.py index 2f57f86..03f4cd4 --- a/examples/genomicsdb_cache +++ b/genomicsdb/scripts/genomicsdb_cache.py @@ -5,7 +5,7 @@ # # The MIT License # -# Copyright (c) 2024 dātma, inc™ +# Copyright (c) 2024-2025 dātma, inc™ # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,9 +29,8 @@ import argparse import json -import genomicsdb_common - import genomicsdb +from genomicsdb.scripts import genomicsdb_common def is_cloud_path(path): diff --git a/examples/genomicsdb_common.py b/genomicsdb/scripts/genomicsdb_common.py similarity index 100% rename from examples/genomicsdb_common.py rename to genomicsdb/scripts/genomicsdb_common.py diff --git a/examples/genomicsdb_query b/genomicsdb/scripts/genomicsdb_query.py old mode 100755 new mode 100644 similarity index 99% rename from examples/genomicsdb_query rename to genomicsdb/scripts/genomicsdb_query.py index 05d3445..ef59c46 --- a/examples/genomicsdb_query +++ b/genomicsdb/scripts/genomicsdb_query.py @@ -5,7 +5,7 @@ # # The MIT License # -# Copyright (c) 2024 dātma, inc™ +# Copyright (c) 2024-2025 dātma, inc™ # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -35,7 +35,6 @@ import sys from typing import List, NamedTuple -import genomicsdb_common import pyarrow as pa import pyarrow.parquet as pq @@ -43,6 +42,7 @@ from genomicsdb import json_output_mode from genomicsdb.protobuf import genomicsdb_coordinates_pb2 as query_coords from genomicsdb.protobuf import genomicsdb_export_config_pb2 as query_pb +from genomicsdb.scripts import genomicsdb_common logging.basicConfig( format="%(asctime)s.%(msecs)03d %(levelname)-5s GenomicsDB Python - pid=%(process)d tid=%(thread)d %(message)s", diff --git a/setup.py b/setup.py index 2bb55ef..f6b5d6c 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ # # The MIT License (MIT) # -# Copyright (c) 2023 dātma, inc™ +# Copyright (c) 2023-2025 dātma, inc™ # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -146,7 +146,7 @@ def run_cythonize(src): setup( name="genomicsdb", - description="Experimental Python Bindings for querying GenomicsDB", + description="Python Bindings for querying GenomicsDB", long_description=long_description, long_description_content_type="text/markdown", author="GenomicsDB.org", @@ -159,12 +159,18 @@ def run_cythonize(src): setup_requires=["cython>=0.27"], install_requires=install_requirements, python_requires=">=3.9", - packages=find_packages(exclude=["package", "test"]), + packages=find_packages(include=["genomicsdb", "genomicsdb.*"], exclude=["package", "test"]), keywords=["genomics", "genomicsdb", "variant", "vcf", "variant calls"], include_package_data=True, version=with_version, + entry_points={ + "console_scripts": [ + "genomicsdb_query=genomicsdb.scripts.genomicsdb_query:main", + "genomicsdb_cache=genomicsdb.scripts.genomicsdb_cache:main", + ], + }, classifiers=[ - "Development Status :: 3 - Alpha", + "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: Science/Research", diff --git a/examples/examples_ws.tgz b/test/scripts/examples_ws.tgz similarity index 100% rename from examples/examples_ws.tgz rename to test/scripts/examples_ws.tgz diff --git a/test/scripts/genomicsdb_cache b/test/scripts/genomicsdb_cache new file mode 100755 index 0000000..b52d910 --- /dev/null +++ b/test/scripts/genomicsdb_cache @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +# +# genomicsdb_cache wrappper +# +# The MIT License +# +# Copyright (c) 2025 dātma, inc™ +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +import sys + +from genomicsdb.scripts.genomicsdb_cache import main + +if __name__ == '__main__': + sys.exit(main()) diff --git a/test/scripts/genomicsdb_query b/test/scripts/genomicsdb_query new file mode 100755 index 0000000..4ccfde0 --- /dev/null +++ b/test/scripts/genomicsdb_query @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +# +# genomicsdb_query wrapper +# +# The MIT License +# +# Copyright (c) 2025 dātma, inc™ +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +import sys + +from genomicsdb.scripts.genomicsdb_query import main + +if __name__ == '__main__': + sys.exit(main()) diff --git a/examples/test.sh b/test/scripts/test.sh similarity index 99% rename from examples/test.sh rename to test/scripts/test.sh index 278de15..905da13 100755 --- a/examples/test.sh +++ b/test/scripts/test.sh @@ -205,7 +205,7 @@ rm -f loader.json callset.json vidmap.json if [[ -z $OLDSTYLE_DIR ]]; then OLDSTYLE_DIR=$TEMP_DIR/old_style mkdir -p $OLDSTYLE_DIR - tar xzf $(dirname $0)/../test/inputs/sanity.test.tgz -C $OLDSTYLE_DIR + tar xzf $(dirname $0)/../inputs/sanity.test.tgz -C $OLDSTYLE_DIR fi WORKSPACE=$OLDSTYLE_DIR/ws