From 3983108a3519af4c08a7260703248ca8c0132c44 Mon Sep 17 00:00:00 2001 From: Trevor Golden Date: Fri, 6 Dec 2024 10:51:03 -0900 Subject: [PATCH 1/9] Fix bug preventing setting of logging level in setup_cli_logger --- gutils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gutils/__init__.py b/gutils/__init__.py index 9cb1ea6..eea8a21 100644 --- a/gutils/__init__.py +++ b/gutils/__init__.py @@ -305,5 +305,5 @@ def setup_cli_logger(level=None): formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') sh.setFormatter(formatter) root_logger = logging.getLogger() - root_logger.setLevel(logging.INFO) + root_logger.setLevel(level) root_logger.handlers = [sh] From bb42570c67b09ff7ca143398b2d8e6fb74634f49 Mon Sep 17 00:00:00 2001 From: Trevor Golden Date: Fri, 6 Dec 2024 11:36:36 -0900 Subject: [PATCH 2/9] Make process_folder respect argument to reader_class parameter --- gutils/nc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gutils/nc.py b/gutils/nc.py index dc591e6..c712192 100644 --- a/gutils/nc.py +++ b/gutils/nc.py @@ -727,7 +727,7 @@ def merge_profile_netcdf_files(folder, output): os.remove(new_path) -def process_folder(deployment_path, mode, merger_class, reader_class, subset=True, template='trajectory', profile_id_type=ProfileIdTypes.EPOCH, workers=4, **filters): +def process_folder(deployment_path, mode, merger_class, reader_class=SlocumReader, subset=True, template='trajectory', profile_id_type=ProfileIdTypes.EPOCH, workers=4, **filters): from multiprocessing import Pool @@ -748,7 +748,7 @@ def process_folder(deployment_path, mode, merger_class, reader_class, subset=Tru with Pool(processes=workers) as pool: kwargs = dict( - reader_class=SlocumReader, + reader_class=reader_class, deployments_path=Path(str(deployment_path)).parent, subset=subset, template=template, From e797f72a65fa470b23b7bc495d32ae7d16a5e9c7 Mon Sep 17 00:00:00 2001 From: Trevor Golden Date: Fri, 6 Dec 2024 11:54:01 -0900 Subject: [PATCH 3/9] Make SlocumMerger default merger_class for process_folder --- gutils/nc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gutils/nc.py b/gutils/nc.py index c712192..a191e73 100644 --- a/gutils/nc.py +++ b/gutils/nc.py @@ -727,7 +727,7 @@ def merge_profile_netcdf_files(folder, output): os.remove(new_path) -def process_folder(deployment_path, mode, merger_class, reader_class=SlocumReader, subset=True, template='trajectory', profile_id_type=ProfileIdTypes.EPOCH, workers=4, **filters): +def process_folder(deployment_path, mode, merger_class=SlocumMerger, reader_class=SlocumReader, subset=True, template='trajectory', profile_id_type=ProfileIdTypes.EPOCH, workers=4, **filters): from multiprocessing import Pool From 942ce290520dd2f87ffee84626dd93ce0e1d7608 Mon Sep 17 00:00:00 2001 From: Trevor Golden Date: Fri, 6 Dec 2024 12:05:02 -0900 Subject: [PATCH 4/9] Fix URL in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 29f41cf..e7e5198 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ $ conda install -c conda-forge gutils ## Setup ```bash -$ git clone [git@git.axiom:axiom/packrat.git](https://github.com/secoora/GUTILS.git) +$ git clone https://github.com/secoora/GUTILS.git ``` Install Anaconda (using python3): http://conda.pydata.org/docs/download.html From 3873769f3b96f9777b969a685b19545abfab67f1 Mon Sep 17 00:00:00 2001 From: Trevor Golden Date: Wed, 11 Dec 2024 16:23:31 -0900 Subject: [PATCH 5/9] Fix build failures by passing --use-pip517 flag to pip install --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 459424e..3c04464 100644 --- a/Dockerfile +++ b/Dockerfile @@ -60,6 +60,7 @@ RUN pip install \ --no-deps \ --force-reinstall \ --ignore-installed \ + --use-pep517 \ -r /tmp/pip-requirements.txt ENV PATH /opt/conda/bin:$PATH @@ -78,5 +79,5 @@ ENV GUTILS_VERSION 3.2.0 ENV PROJECT_ROOT /code RUN mkdir -p "$PROJECT_ROOT" COPY . $PROJECT_ROOT -RUN cd $PROJECT_ROOT && pip install --no-deps . +RUN cd $PROJECT_ROOT && pip install --no-deps --use-pep517 . WORKDIR $PROJECT_ROOT From 440a0511ca351492b79e8d12769d1da5033a868d Mon Sep 17 00:00:00 2001 From: Trevor Golden Date: Wed, 11 Dec 2024 16:27:52 -0900 Subject: [PATCH 6/9] Add entrypoint scripts for bulk processing/checking deployment folders --- gutils/nc.py | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++- setup.cfg | 2 + 2 files changed, 196 insertions(+), 1 deletion(-) diff --git a/gutils/nc.py b/gutils/nc.py index a191e73..d55838c 100644 --- a/gutils/nc.py +++ b/gutils/nc.py @@ -32,7 +32,7 @@ from gutils import get_uv_data, get_profile_data, read_attrs, safe_makedirs, setup_cli_logger from gutils.filters import process_dataset -from gutils.slocum import SlocumReader +from gutils.slocum import SlocumMerger, SlocumReader import logging logging.getLogger("urllib3").setLevel(logging.WARNING) @@ -591,6 +591,7 @@ def main_create(): # CHECKER def check_dataset(args): + L.info('Checking {}'.format(args.file)) check_suite = CheckSuite() check_suite.load_all_available_checkers() @@ -763,3 +764,195 @@ def process_folder(deployment_path, mode, merger_class=SlocumMerger, reader_clas ] print([ res.get() for res in multiple_results ]) + +def process_folder_arg_parser(): + parser = argparse.ArgumentParser( + description='Parses a deployment folder of binary files into a set of ' + 'NetCDF files according to JSON configurations ' + 'for institution, deployment, glider, and datatypes.' + ) + parser.add_argument( + 'deployment_path', + help='Path to folder containing all deployment config and for file output.' + ) + parser.add_argument( + '--mode', + help="Glider mode, either 'rt' (real-time) or 'delayed'; default is 'delayed' since this is a bulk operation.", + default='delayed', + choices=['rt', 'delayed'] + ) + parser.add_argument( + "-r", + "--reader_class", + help="Glider reader to interpret the data", + default='slocum' + ) + parser.add_argument( + "-m", + "--merger_class", + help="Glider merger to convert the data from binary to ASCII", + default='slocum' + ) + parser.add_argument( + '-ts', '--tsint', + help="Interpolation window to consider when assigning profiles", + default=None, + type=int + ) + parser.add_argument( + '-fp', '--filter_points', + help="Filter out profiles that do not have at least this number of points", + default=None, + type=int + ) + parser.add_argument( + '-fd', '--filter_distance', + help="Filter out profiles that do not span at least this vertical distance (meters)", + default=None, + type=float + ) + parser.add_argument( + '-ft', '--filter_time', + help="Filter out profiles that last less than this numer of seconds", + default=None, + type=float + ) + parser.add_argument( + '-fz', '--filter_z', + help="Filter out profiles that are not completely below this depth (meters)", + default=None, + type=float + ) + parser.add_argument( + "-za", + "--z_axis_method", + help="1 == Calculate depth from pressure, 2 == Use raw depth values", + default=1, + type=int + ) + parser.add_argument( + '--no-subset', + dest='subset', + action='store_false', + help='Process all variables - not just those available in a datatype mapping JSON file' + ) + parser.add_argument( + "-t", + "--template", + help="The template to use when writing netCDF files. Options: None, [filepath], trajectory, ioos_ngdac", + default='trajectory' + ), + parser.add_argument( + "-w", + "--workers", + help="The number of workers to use when processing the files", + type=int, + default=4 + ), + parser.add_argument( + '--log_level', + help='Set the logging level', + default='WARNING', + choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + ) + parser.set_defaults(subset=True) + + return parser + +def main_process_folder(): + + parser = process_folder_arg_parser() + + args = parser.parse_args() + + filter_args = vars(args) + + # Remove non-filter args into positional arguments + deployment_path = filter_args.pop('deployment_path') + mode = filter_args.pop('mode') + subset = filter_args.pop('subset') + template = filter_args.pop('template') + z_axis_method = filter_args.pop('z_axis_method') + workers = filter_args.pop('workers') + log_level = filter_args.pop('log_level') + + setup_cli_logger(getattr(logging, log_level)) + + # Move reader_class to a class + reader_class = filter_args.pop('reader_class') + if reader_class == 'slocum': + reader_class = SlocumReader + + # Move merger_class to a class + merger_class = filter_args.pop('merger_class') + if merger_class == 'slocum': + merger_class = SlocumMerger + + process_folder( + deployment_path=deployment_path, + mode=mode, + reader_class=reader_class, + merger_class=merger_class, + subset=subset, + template=template, + workers=workers, + z_axis_method=z_axis_method, + **filter_args + ) + +def check_folder_arg_parser(): + parser = argparse.ArgumentParser( + description='Verifies that a folder of glider NetCDF files from a provider ' + 'contain all the required global attributes, dimensions,' + 'scalar variables and dimensioned variables.' + ) + parser.add_argument( + 'deployment_path', + help='Path to folder containing all deployment config and for file output.' + ) + parser.add_argument( + '--mode', + help="Glider mode, either 'rt' (real-time) or 'delayed'; default is 'delayed' since this is a bulk operation.", + default='delayed', + choices=['rt', 'delayed'] + ) + parser.add_argument( + "-w", + "--workers", + help="The number of workers to use when checking the files", + type=int, + default=4 + ), + parser.add_argument( + '--log_level', + help='Set the logging level', + default='WARNING', + choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + ) + return parser + +class CheckDatasetArgs: + def __init__(self, file): + self.file = file + +def create_check_dataset_args(file): + return CheckDatasetArgs(file) + +def main_check_folder(): + from multiprocessing import Pool + + parser = check_folder_arg_parser() + args = parser.parse_args() + + setup_cli_logger(getattr(logging, args.log_level)) + + ncs = Path(args.deployment_path, args.mode, 'netcdf').glob('*.nc') + + with Pool(processes=args.workers) as pool: + multiple_results = [ + pool.apply_async( + check_dataset, (create_check_dataset_args(str(x)),) + ) for x in ncs + ] + + print([ res.get() for res in multiple_results ]) diff --git a/setup.cfg b/setup.cfg index b53f795..255f35f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,7 +54,9 @@ exclude = [options.entry_points] console_scripts = gutils_create_nc = gutils.nc:main_create + gutils_process_folder = gutils.nc:main_process_folder gutils_check_nc = gutils.nc:main_check + gutils_check_folder = gutils.nc:main_check_folder gutils_binary_to_ascii_watch = gutils.watch.binary:main_to_ascii gutils_ascii_to_netcdf_watch = gutils.watch.ascii:main_to_netcdf gutils_netcdf_to_ftp_watch = gutils.watch.netcdf:main_to_ftp From 9bee062a00e24065cb896549d9ac178e12c4a08b Mon Sep 17 00:00:00 2001 From: Trevor Golden Date: Thu, 13 Feb 2025 15:29:38 -0900 Subject: [PATCH 7/9] Pin CI pre-commit job to Python 3.9 --- .github/workflows/push.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index cb2afbd..97e812c 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -16,6 +16,8 @@ jobs: steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v2 + with: + python-version: 3.9 - uses: pre-commit/action@v2.0.0 test: From 7715cdee6aed4a10adc99086001807d96af729d7 Mon Sep 17 00:00:00 2001 From: Trevor Golden Date: Thu, 13 Feb 2025 15:33:00 -0900 Subject: [PATCH 8/9] Remove lint --- gutils/nc.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gutils/nc.py b/gutils/nc.py index d55838c..6647af8 100644 --- a/gutils/nc.py +++ b/gutils/nc.py @@ -765,6 +765,7 @@ def process_folder(deployment_path, mode, merger_class=SlocumMerger, reader_clas print([ res.get() for res in multiple_results ]) + def process_folder_arg_parser(): parser = argparse.ArgumentParser( description='Parses a deployment folder of binary files into a set of ' @@ -859,6 +860,7 @@ def process_folder_arg_parser(): return parser + def main_process_folder(): parser = process_folder_arg_parser() @@ -900,6 +902,7 @@ def main_process_folder(): **filter_args ) + def check_folder_arg_parser(): parser = argparse.ArgumentParser( description='Verifies that a folder of glider NetCDF files from a provider ' @@ -931,13 +934,16 @@ def check_folder_arg_parser(): ) return parser + class CheckDatasetArgs: def __init__(self, file): self.file = file + def create_check_dataset_args(file): return CheckDatasetArgs(file) + def main_check_folder(): from multiprocessing import Pool From 5cfd62dde81f058a12ed42a08b4671d914ca0fbf Mon Sep 17 00:00:00 2001 From: Trevor Golden Date: Thu, 13 Feb 2025 15:51:04 -0900 Subject: [PATCH 9/9] Migrate CI from provision-with-micromamba to setup-micromamba https://github.com/mamba-org/provision-with-micromamba#migration-to-setup-micromamba --- .github/workflows/push.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 97e812c..60f51f8 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -31,9 +31,7 @@ jobs: - uses: actions/checkout@v3 - name: Setup Micromamba - uses: mamba-org/provision-with-micromamba@main - with: - environment-file: false + uses: mamba-org/setup-micromamba@v1 - name: Python ${{ matrix.python-version }} shell: bash -l {0}