diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index cb2afbd..60f51f8 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -16,6 +16,8 @@ jobs: steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v2 + with: + python-version: 3.9 - uses: pre-commit/action@v2.0.0 test: @@ -29,9 +31,7 @@ jobs: - uses: actions/checkout@v3 - name: Setup Micromamba - uses: mamba-org/provision-with-micromamba@main - with: - environment-file: false + uses: mamba-org/setup-micromamba@v1 - name: Python ${{ matrix.python-version }} shell: bash -l {0} diff --git a/Dockerfile b/Dockerfile index 459424e..3c04464 100644 --- a/Dockerfile +++ b/Dockerfile @@ -60,6 +60,7 @@ RUN pip install \ --no-deps \ --force-reinstall \ --ignore-installed \ + --use-pep517 \ -r /tmp/pip-requirements.txt ENV PATH /opt/conda/bin:$PATH @@ -78,5 +79,5 @@ ENV GUTILS_VERSION 3.2.0 ENV PROJECT_ROOT /code RUN mkdir -p "$PROJECT_ROOT" COPY . $PROJECT_ROOT -RUN cd $PROJECT_ROOT && pip install --no-deps . +RUN cd $PROJECT_ROOT && pip install --no-deps --use-pep517 . WORKDIR $PROJECT_ROOT diff --git a/README.md b/README.md index 29f41cf..e7e5198 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ $ conda install -c conda-forge gutils ## Setup ```bash -$ git clone [git@git.axiom:axiom/packrat.git](https://github.com/secoora/GUTILS.git) +$ git clone https://github.com/secoora/GUTILS.git ``` Install Anaconda (using python3): http://conda.pydata.org/docs/download.html diff --git a/gutils/__init__.py b/gutils/__init__.py index 9cb1ea6..eea8a21 100644 --- a/gutils/__init__.py +++ b/gutils/__init__.py @@ -305,5 +305,5 @@ def setup_cli_logger(level=None): formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') sh.setFormatter(formatter) root_logger = logging.getLogger() - root_logger.setLevel(logging.INFO) + root_logger.setLevel(level) root_logger.handlers = [sh] diff --git a/gutils/nc.py b/gutils/nc.py index dc591e6..6647af8 100644 --- a/gutils/nc.py +++ b/gutils/nc.py @@ -32,7 +32,7 @@ from gutils import get_uv_data, get_profile_data, read_attrs, safe_makedirs, setup_cli_logger from gutils.filters import process_dataset -from gutils.slocum import SlocumReader +from gutils.slocum import SlocumMerger, SlocumReader import logging logging.getLogger("urllib3").setLevel(logging.WARNING) @@ -591,6 +591,7 @@ def main_create(): # CHECKER def check_dataset(args): + L.info('Checking {}'.format(args.file)) check_suite = CheckSuite() check_suite.load_all_available_checkers() @@ -727,7 +728,7 @@ def merge_profile_netcdf_files(folder, output): os.remove(new_path) -def process_folder(deployment_path, mode, merger_class, reader_class, subset=True, template='trajectory', profile_id_type=ProfileIdTypes.EPOCH, workers=4, **filters): +def process_folder(deployment_path, mode, merger_class=SlocumMerger, reader_class=SlocumReader, subset=True, template='trajectory', profile_id_type=ProfileIdTypes.EPOCH, workers=4, **filters): from multiprocessing import Pool @@ -748,7 +749,7 @@ def process_folder(deployment_path, mode, merger_class, reader_class, subset=Tru with Pool(processes=workers) as pool: kwargs = dict( - reader_class=SlocumReader, + reader_class=reader_class, deployments_path=Path(str(deployment_path)).parent, subset=subset, template=template, @@ -763,3 +764,201 @@ def process_folder(deployment_path, mode, merger_class, reader_class, subset=Tru ] print([ res.get() for res in multiple_results ]) + + +def process_folder_arg_parser(): + parser = argparse.ArgumentParser( + description='Parses a deployment folder of binary files into a set of ' + 'NetCDF files according to JSON configurations ' + 'for institution, deployment, glider, and datatypes.' + ) + parser.add_argument( + 'deployment_path', + help='Path to folder containing all deployment config and for file output.' + ) + parser.add_argument( + '--mode', + help="Glider mode, either 'rt' (real-time) or 'delayed'; default is 'delayed' since this is a bulk operation.", + default='delayed', + choices=['rt', 'delayed'] + ) + parser.add_argument( + "-r", + "--reader_class", + help="Glider reader to interpret the data", + default='slocum' + ) + parser.add_argument( + "-m", + "--merger_class", + help="Glider merger to convert the data from binary to ASCII", + default='slocum' + ) + parser.add_argument( + '-ts', '--tsint', + help="Interpolation window to consider when assigning profiles", + default=None, + type=int + ) + parser.add_argument( + '-fp', '--filter_points', + help="Filter out profiles that do not have at least this number of points", + default=None, + type=int + ) + parser.add_argument( + '-fd', '--filter_distance', + help="Filter out profiles that do not span at least this vertical distance (meters)", + default=None, + type=float + ) + parser.add_argument( + '-ft', '--filter_time', + help="Filter out profiles that last less than this numer of seconds", + default=None, + type=float + ) + parser.add_argument( + '-fz', '--filter_z', + help="Filter out profiles that are not completely below this depth (meters)", + default=None, + type=float + ) + parser.add_argument( + "-za", + "--z_axis_method", + help="1 == Calculate depth from pressure, 2 == Use raw depth values", + default=1, + type=int + ) + parser.add_argument( + '--no-subset', + dest='subset', + action='store_false', + help='Process all variables - not just those available in a datatype mapping JSON file' + ) + parser.add_argument( + "-t", + "--template", + help="The template to use when writing netCDF files. Options: None, [filepath], trajectory, ioos_ngdac", + default='trajectory' + ), + parser.add_argument( + "-w", + "--workers", + help="The number of workers to use when processing the files", + type=int, + default=4 + ), + parser.add_argument( + '--log_level', + help='Set the logging level', + default='WARNING', + choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + ) + parser.set_defaults(subset=True) + + return parser + + +def main_process_folder(): + + parser = process_folder_arg_parser() + + args = parser.parse_args() + + filter_args = vars(args) + + # Remove non-filter args into positional arguments + deployment_path = filter_args.pop('deployment_path') + mode = filter_args.pop('mode') + subset = filter_args.pop('subset') + template = filter_args.pop('template') + z_axis_method = filter_args.pop('z_axis_method') + workers = filter_args.pop('workers') + log_level = filter_args.pop('log_level') + + setup_cli_logger(getattr(logging, log_level)) + + # Move reader_class to a class + reader_class = filter_args.pop('reader_class') + if reader_class == 'slocum': + reader_class = SlocumReader + + # Move merger_class to a class + merger_class = filter_args.pop('merger_class') + if merger_class == 'slocum': + merger_class = SlocumMerger + + process_folder( + deployment_path=deployment_path, + mode=mode, + reader_class=reader_class, + merger_class=merger_class, + subset=subset, + template=template, + workers=workers, + z_axis_method=z_axis_method, + **filter_args + ) + + +def check_folder_arg_parser(): + parser = argparse.ArgumentParser( + description='Verifies that a folder of glider NetCDF files from a provider ' + 'contain all the required global attributes, dimensions,' + 'scalar variables and dimensioned variables.' + ) + parser.add_argument( + 'deployment_path', + help='Path to folder containing all deployment config and for file output.' + ) + parser.add_argument( + '--mode', + help="Glider mode, either 'rt' (real-time) or 'delayed'; default is 'delayed' since this is a bulk operation.", + default='delayed', + choices=['rt', 'delayed'] + ) + parser.add_argument( + "-w", + "--workers", + help="The number of workers to use when checking the files", + type=int, + default=4 + ), + parser.add_argument( + '--log_level', + help='Set the logging level', + default='WARNING', + choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + ) + return parser + + +class CheckDatasetArgs: + def __init__(self, file): + self.file = file + + +def create_check_dataset_args(file): + return CheckDatasetArgs(file) + + +def main_check_folder(): + from multiprocessing import Pool + + parser = check_folder_arg_parser() + args = parser.parse_args() + + setup_cli_logger(getattr(logging, args.log_level)) + + ncs = Path(args.deployment_path, args.mode, 'netcdf').glob('*.nc') + + with Pool(processes=args.workers) as pool: + multiple_results = [ + pool.apply_async( + check_dataset, (create_check_dataset_args(str(x)),) + ) for x in ncs + ] + + print([ res.get() for res in multiple_results ]) diff --git a/setup.cfg b/setup.cfg index b53f795..255f35f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,7 +54,9 @@ exclude = [options.entry_points] console_scripts = gutils_create_nc = gutils.nc:main_create + gutils_process_folder = gutils.nc:main_process_folder gutils_check_nc = gutils.nc:main_check + gutils_check_folder = gutils.nc:main_check_folder gutils_binary_to_ascii_watch = gutils.watch.binary:main_to_ascii gutils_ascii_to_netcdf_watch = gutils.watch.ascii:main_to_netcdf gutils_netcdf_to_ftp_watch = gutils.watch.netcdf:main_to_ftp