diff --git a/access/gfz_isdc_dealiasing_ftp.py b/access/gfz_isdc_dealiasing_sync.py
similarity index 62%
rename from access/gfz_isdc_dealiasing_ftp.py
rename to access/gfz_isdc_dealiasing_sync.py
index 73394d0d..f51f9b78 100644
--- a/access/gfz_isdc_dealiasing_ftp.py
+++ b/access/gfz_isdc_dealiasing_sync.py
@@ -1,13 +1,14 @@
#!/usr/bin/env python
u"""
-gfz_isdc_dealiasing_ftp.py
-Written by Tyler Sutterley (05/2023)
+gfz_isdc_dealiasing_sync.py
+Written by Tyler Sutterley (10/2025)
Syncs GRACE Level-1b dealiasing products from the GFZ Information
System and Data Center (ISDC)
+
Optionally outputs as monthly tar files
CALLING SEQUENCE:
- python gfz_isdc_dealiasing_ftp.py --year=2015 --release=RL06 --tar
+ python gfz_isdc_dealiasing_sync.py --year=2015 --release=RL06 --tar
COMMAND LINE OPTIONS:
-D X, --directory X: working data directory
@@ -30,6 +31,7 @@
utilities.py: download and management utilities for syncing files
UPDATE HISTORY:
+ Updated 10/2025: switch to https as ftp server is being retired
Updated 05/2023: use pathlib to define and operate on paths
Updated 03/2023: increase default year range to sync
Updated 12/2022: single implicit import of gravity toolkit
@@ -51,8 +53,9 @@
import sys
import os
import re
+import ssl
import time
-import ftplib
+import shutil
import logging
import pathlib
import tarfile
@@ -62,7 +65,7 @@
# PURPOSE: syncs GRACE Level-1b dealiasing products from the GFZ data server
# and optionally outputs as monthly tar files
-def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
+def gfz_isdc_dealiasing_sync(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
TIMEOUT=None, LOG=False, CLOBBER=False, MODE=None):
# check if directory exists and recursively create if not
base_dir = pathlib.Path(base_dir).expanduser().absolute()
@@ -81,10 +84,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
# standard output (terminal output)
logging.basicConfig(level=logging.INFO)
- # remote HOST for DREL on GFZ data server
- # connect and login to GFZ ftp server
- ftp = ftplib.FTP('isdcftp.gfz-potsdam.de', timeout=TIMEOUT)
- ftp.login()
+ # GFZ ISDC https host
+ HOST = 'https://isdc-data.gfz.de/'
# compile regular expression operator for years to sync
if YEAR is None:
@@ -97,9 +98,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
SUFFIX = dict(RL04='tar.gz', RL05='tar.gz', RL06='tgz')
# find remote yearly directories for DREL
- YRS,_ = gravtk.utilities.ftp_list([ftp.host,'grace',
- 'Level-1B', 'GFZ','AOD',DREL], timeout=TIMEOUT, basename=True,
- pattern=R1, sort=True)
+ YRS,_ = http_list([HOST,'grace','Level-1B', 'GFZ','AOD',DREL],
+ timeout=TIMEOUT, basename=True, pattern=R1, sort=True)
# for each year
for Y in YRS:
# for each month of interest
@@ -114,8 +114,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
# will extract year and month and calendar day from the ascii file
regex_pattern = r'AOD1B_({0})-({1:02d})-(\d+)_X_\d+.asc.gz$'
R2 = re.compile(regex_pattern.format(Y,M), re.VERBOSE)
- remote_files,remote_mtimes = gravtk.utilities.ftp_list(
- [ftp.host,'grace','Level-1B','GFZ','AOD',DREL,Y],
+ remote_files,remote_mtimes = http_list(
+ [HOST,'grace','Level-1B','GFZ','AOD',DREL,Y],
timeout=TIMEOUT, basename=True, pattern=R2, sort=True)
file_count = len(remote_files)
# if compressing into monthly tar files
@@ -124,10 +124,10 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
tar = tarfile.open(name=local_tar_file, mode='w:gz')
for fi,remote_mtime in zip(remote_files,remote_mtimes):
# remote version of each input file
- remote = [ftp.host,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
- logging.info(posixpath.join('ftp://',*remote))
+ remote = [HOST,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
+ logging.info(posixpath.join(*remote))
# retrieve bytes from remote file
- remote_buffer = gravtk.utilities.from_ftp(remote,
+ remote_buffer = gravtk.utilities.from_sync(remote,
timeout=TIMEOUT)
# add file to tar
tar_info = tarfile.TarInfo(name=fi)
@@ -142,23 +142,96 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
# copy each gzip file and keep as individual daily files
for fi,remote_mtime in zip(remote_files,remote_mtimes):
# remote and local version of each input file
- remote = [ftp.host,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
+ remote = [HOST,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
local_file = grace_dir.joinpath(fi)
- ftp_mirror_file(ftp,remote,remote_mtime,local_file,
+ http_pull_file(remote,remote_mtime,local_file,
CLOBBER=CLOBBER, MODE=MODE)
- # close the ftp connection
- ftp.quit()
# close log file and set permissions level to MODE
if LOG:
LOGFILE.chmod(mode=MODE)
+# PURPOSE: list a directory on the GFZ https server
+def http_list(
+ HOST: str | list,
+ timeout: int | None = None,
+ context: ssl.SSLContext = gravtk.utilities._default_ssl_context,
+ pattern: str | re.Pattern = '',
+ sort: bool = False
+ ):
+ """
+ List a directory on the GFZ https Server
+
+ Parameters
+ ----------
+ HOST: str or list
+ remote http host path
+ timeout: int or NoneType, default None
+ timeout in seconds for blocking operations
+ context: obj, default gravity_toolkit.utilities._default_ssl_context
+ SSL context for ``urllib`` opener object
+ pattern: str, default ''
+ regular expression pattern for reducing list
+ sort: bool, default False
+ sort output list
+
+ Returns
+ -------
+ colnames: list
+ column names in a directory
+ collastmod: list
+ last modification times for items in the directory
+ """
+ # verify inputs for remote http host
+ if isinstance(HOST, str):
+ HOST = gravtk.utilities.url_split(HOST)
+ # regular expression pattern for finding files and modification times
+ parser = r'\(.*?)\<\/a\>\s+(\d{4}-\d{2}-\d{2}\s+\d{2}\:\d{2})'
+ rx = re.compile(parser, re.VERBOSE)
+ # try listing from http
+ try:
+ # Create and submit request.
+ request = gravtk.utilities.urllib2.Request(posixpath.join(*HOST))
+ response = gravtk.utilities.urllib2.urlopen(request,
+ timeout=timeout, context=context)
+ except Exception as exc:
+ raise Exception('List error from {0}'.format(posixpath.join(*HOST)))
+ # read the directory listing
+ contents = response.readlines()
+ # read and parse request for files (column names and modified times)
+ lines = [l for l in contents if rx.search(l.decode('utf-8'))]
+ # column names and last modified times
+ colnames = [None]*len(lines)
+ collastmod = [None]*len(lines)
+ for i, l in enumerate(lines):
+ colnames[i], lastmod = rx.findall(l.decode('utf-8')).pop()
+ # get the Unix timestamp value for a modification time
+ collastmod[i] = gravtk.utilities.get_unix_time(lastmod,
+ format='%Y-%m-%d %H:%M')
+ # reduce using regular expression pattern
+ if pattern:
+ i = [i for i,f in enumerate(colnames) if re.search(pattern, f)]
+ # reduce list of column names and last modified times
+ colnames = [colnames[indice] for indice in i]
+ collastmod = [collastmod[indice] for indice in i]
+ # sort the list
+ if sort:
+ i = [i for i,j in sorted(enumerate(colnames), key=lambda i: i[1])]
+ # sort list of column names and last modified times
+ colnames = [colnames[indice] for indice in i]
+ collastmod = [collastmod[indice] for indice in i]
+ # return the list of column names and last modified times
+ return (colnames, collastmod)
+
# PURPOSE: pull file from a remote host checking if file exists locally
# and if the remote file is newer than the local file
-def ftp_mirror_file(ftp,remote_path,remote_mtime,local_file,
- CLOBBER=False,MODE=0o775):
- # path to remote file
- remote_file = posixpath.join(*remote_path[1:])
+def http_pull_file(remote_path, remote_mtime, local_file,
+ TIMEOUT=0, LIST=False, CLOBBER=False, MODE=0o775):
+ # verify inputs for remote http host
+ if isinstance(remote_path, str):
+ remote_path = gravtk.utilities.url_split(remote_path)
+ # construct remote file path
+ remote_file = posixpath.join(*remote_path)
# if file exists in file system: check if remote file is newer
TEST = False
OVERWRITE = ' (clobber)'
@@ -178,15 +251,24 @@ def ftp_mirror_file(ftp,remote_path,remote_mtime,local_file,
# if file does not exist locally, is to be overwritten, or CLOBBER is set
if TEST or CLOBBER:
# Printing files transferred
- remote_ftp_url = posixpath.join('ftp://',*remote_path)
- logging.info(f'{remote_ftp_url} -->')
- logging.info(f'\t{local_file}{OVERWRITE}\n')
- # copy remote file contents to local file
- with local_file.open(mode='wb') as f:
- ftp.retrbinary(f'RETR {remote_file}', f.write)
- # keep remote modification time of file and local access time
- os.utime(local_file, (local_file.stat().st_atime, remote_mtime))
- local_file.chmod(mode=MODE)
+ logging.info(f'{remote_file} --> ')
+ logging.info(f'\t{str(local_file)}{OVERWRITE}\n')
+ # if executing copy command (not only printing the files)
+ if not LIST:
+ # Create and submit request. There are a wide range of exceptions
+ # that can be thrown here, including HTTPError and URLError.
+ request = gravtk.utilities.urllib2.Request(remote_file)
+ response = gravtk.utilities.urllib2.urlopen(request,
+ timeout=TIMEOUT)
+ # chunked transfer encoding size
+ CHUNK = 16 * 1024
+ # copy contents to local file using chunked transfer encoding
+ # transfer should work properly with ascii and binary data formats
+ with local_file.open(mode='wb') as f:
+ shutil.copyfileobj(response, f, CHUNK)
+ # keep remote modification time of file and local access time
+ os.utime(local_file, (local_file.stat().st_atime, remote_mtime))
+ local_file.chmod(mode=MODE)
# PURPOSE: create argument parser
def arguments():
@@ -243,14 +325,17 @@ def main():
parser = arguments()
args,_ = parser.parse_known_args()
+ # GFZ ISDC https host
+ HOST = 'https://isdc-data.gfz.de/'
# check internet connection before attempting to run program
- HOST = 'isdcftp.gfz-potsdam.de'
- if gravtk.utilities.check_ftp_connection(HOST):
+ if gravtk.utilities.check_connection(HOST):
for DREL in args.release:
- gfz_isdc_dealiasing_ftp(args.directory, DREL=DREL,
+ gfz_isdc_dealiasing_sync(args.directory, DREL=DREL,
YEAR=args.year, MONTHS=args.month, TAR=args.tar,
TIMEOUT=args.timeout, LOG=args.log,
CLOBBER=args.clobber, MODE=args.mode)
+ else:
+ raise RuntimeError('Check internet connection')
# run main program
if __name__ == '__main__':
diff --git a/access/gfz_isdc_grace_ftp.py b/access/gfz_isdc_grace_sync.py
similarity index 68%
rename from access/gfz_isdc_grace_ftp.py
rename to access/gfz_isdc_grace_sync.py
index f8a9187e..b641e5aa 100644
--- a/access/gfz_isdc_grace_ftp.py
+++ b/access/gfz_isdc_grace_sync.py
@@ -1,15 +1,11 @@
#!/usr/bin/env python
u"""
-gfz_isdc_grace_ftp.py
-Written by Tyler Sutterley (09/2023)
+gfz_isdc_grace_sync.py
+Written by Tyler Sutterley (10/2025)
Syncs GRACE/GRACE-FO data from the GFZ Information System and Data Center (ISDC)
-Syncs CSR/GFZ/JPL files for RL06 GAA/GAB/GAC/GAD/GSM
- GAA and GAB are GFZ/JPL only
-Gets the latest technical note (TN) files
-Gets the monthly GRACE/GRACE-FO newsletters
CALLING SEQUENCE:
- python gfz_isdc_grace_ftp.py
+ python gfz_isdc_grace_sync.py
OUTPUTS:
CSR RL06: GAC/GAD/GSM
@@ -27,7 +23,6 @@
-L, --list: print files to be transferred, but do not execute transfer
-l, --log: output log of files downloaded
-C, --clobber: Overwrite existing data in transfer
- --checksum: compare hashes to check if overwriting existing data
-M X, --mode X: Local permissions mode of the directories and files synced
PYTHON DEPENDENCIES:
@@ -40,6 +35,7 @@
utilities.py: download and management utilities for syncing files
UPDATE HISTORY:
+ Updated 10/2025: switch to https as ftp server is being retired
Updated 09/2023: don't restrict version number to a set list
Updated 05/2023: use pathlib to define and operate on paths
Updated 12/2022: single implicit import of gravity toolkit
@@ -68,11 +64,10 @@
import sys
import os
import re
+import ssl
import copy
import time
-import ftplib
import shutil
-import hashlib
import logging
import pathlib
import argparse
@@ -80,14 +75,16 @@
import gravity_toolkit as gravtk
# PURPOSE: sync local GRACE/GRACE-FO files with GFZ ISDC server
-def gfz_isdc_grace_ftp(DIRECTORY, PROC=[], DREL=[], VERSION=[],
+def gfz_isdc_grace_sync(DIRECTORY, PROC=[], DREL=[], VERSION=[],
NEWSLETTERS=False, TIMEOUT=None, LOG=False, LIST=False,
- CLOBBER=False, CHECKSUM=False, MODE=None):
+ CLOBBER=False, MODE=None):
# check if directory exists and recursively create if not
DIRECTORY = pathlib.Path(DIRECTORY).expanduser().absolute()
DIRECTORY.mkdir(mode=MODE, parents=True, exist_ok=True)
+ # GFZ ISDC https host
+ HOST = 'https://isdc-data.gfz.de/'
# mission shortnames
shortname = {'grace':'GRAC', 'grace-fo':'GRFO'}
# datasets for each processing center
@@ -110,10 +107,6 @@ def gfz_isdc_grace_ftp(DIRECTORY, PROC=[], DREL=[], VERSION=[],
# standard output (terminal output)
logging.basicConfig(level=logging.INFO)
- # connect and login to GFZ ISDC ftp server
- ftp = ftplib.FTP('isdcftp.gfz-potsdam.de', timeout=TIMEOUT)
- ftp.login()
-
# Degree 1 (geocenter) coefficients
logging.info('Degree 1 Coefficients:')
local_dir = DIRECTORY.joinpath('geocenter')
@@ -123,51 +116,51 @@ def gfz_isdc_grace_ftp(DIRECTORY, PROC=[], DREL=[], VERSION=[],
# compile regular expression operator for remote files
R1 = re.compile(r'TN-13_GEOC_(CSR|GFZ|JPL)_(.*?).txt$', re.VERBOSE)
# get filenames from remote directory
- remote_files,remote_mtimes = gravtk.utilities.ftp_list(
- [ftp.host,'grace-fo','DOCUMENTS','TECHNICAL_NOTES'],
- timeout=TIMEOUT, basename=True, pattern=R1, sort=True)
+ remote_files,remote_mtimes = http_list(
+ [HOST,'grace-fo','DOCUMENTS','TECHNICAL_NOTES'],
+ timeout=TIMEOUT, pattern=R1, sort=True)
# for each file on the remote server
for fi,remote_mtime in zip(remote_files,remote_mtimes):
# extract filename from regex object
- remote_path = [ftp.host,'grace-fo','DOCUMENTS','TECHNICAL_NOTES',fi]
+ remote_path = [HOST,'grace-fo','DOCUMENTS','TECHNICAL_NOTES',fi]
local_file = local_dir.joinpath(fi)
- ftp_mirror_file(ftp, remote_path, remote_mtime,
+ http_pull_file(remote_path, remote_mtime,
local_file, TIMEOUT=TIMEOUT, LIST=LIST,
- CLOBBER=CLOBBER, CHECKSUM=CHECKSUM, MODE=MODE)
+ CLOBBER=CLOBBER, MODE=MODE)
# SLR C2,0 coefficients
logging.info('C2,0 Coefficients:')
# compile regular expression operator for remote files
R1 = re.compile(r'TN-(05|07|11)_C20_SLR_RL(.*?).txt$', re.VERBOSE)
# get filenames from remote directory
- remote_files,remote_mtimes = gravtk.utilities.ftp_list(
- [ftp.host,'grace','DOCUMENTS','TECHNICAL_NOTES'],
- timeout=TIMEOUT, basename=True, pattern=R1, sort=True)
+ remote_files,remote_mtimes = http_list(
+ [HOST,'grace','DOCUMENTS','TECHNICAL_NOTES'],
+ timeout=TIMEOUT, pattern=R1, sort=True)
# for each file on the remote server
for fi,remote_mtime in zip(remote_files,remote_mtimes):
# extract filename from regex object
- remote_path = [ftp.host,'grace','DOCUMENTS','TECHNICAL_NOTES',fi]
+ remote_path = [HOST,'grace','DOCUMENTS','TECHNICAL_NOTES',fi]
local_file = DIRECTORY.joinpath(re.sub(r'(_RL.*?).txt','.txt',fi))
- ftp_mirror_file(ftp, remote_path, remote_mtime,
+ http_pull_file(remote_path, remote_mtime,
local_file, TIMEOUT=TIMEOUT, LIST=LIST,
- CLOBBER=CLOBBER, CHECKSUM=CHECKSUM, MODE=MODE)
+ CLOBBER=CLOBBER, MODE=MODE)
# SLR C3,0 coefficients
logging.info('C3,0 Coefficients:')
# compile regular expression operator for remote files
R1 = re.compile(r'TN-(14)_C30_C20_SLR_GSFC.txt$', re.VERBOSE)
# get filenames from remote directory
- remote_files,remote_mtimes = gravtk.utilities.ftp_list(
- [ftp.host,'grace-fo','DOCUMENTS','TECHNICAL_NOTES'],
- timeout=TIMEOUT, basename=True, pattern=R1, sort=True)
+ remote_files,remote_mtimes = http_list(
+ [HOST,'grace-fo','DOCUMENTS','TECHNICAL_NOTES'],
+ timeout=TIMEOUT, pattern=R1, sort=True)
# for each file on the remote server
for fi,remote_mtime in zip(remote_files,remote_mtimes):
# extract filename from regex object
- remote_path = [ftp.host,'grace-fo','DOCUMENTS','TECHNICAL_NOTES',fi]
+ remote_path = [HOST,'grace-fo','DOCUMENTS','TECHNICAL_NOTES',fi]
local_file = DIRECTORY.joinpath(re.sub(r'(SLR_GSFC)','GSFC_SLR',fi))
- ftp_mirror_file(ftp, remote_path, remote_mtime,
+ http_pull_file(remote_path, remote_mtime,
local_file, TIMEOUT=TIMEOUT, LIST=LIST,
- CLOBBER=CLOBBER, CHECKSUM=CHECKSUM, MODE=MODE)
+ CLOBBER=CLOBBER, MODE=MODE)
# TN-08 GAE, TN-09 GAF and TN-10 GAG ECMWF atmosphere correction products
logging.info('TN-08 GAE, TN-09 GAF and TN-10 GAG products:')
@@ -178,17 +171,17 @@ def gfz_isdc_grace_ftp(DIRECTORY, PROC=[], DREL=[], VERSION=[],
# compile regular expression operator for remote files
R1 = re.compile(r'({0}|{1}|{2})'.format(*ECMWF_files), re.VERBOSE)
# get filenames from remote directory
- remote_files,remote_mtimes = gravtk.utilities.ftp_list(
- [ftp.host,'grace','DOCUMENTS','TECHNICAL_NOTES'],
- timeout=TIMEOUT, basename=True, pattern=R1, sort=True)
+ remote_files,remote_mtimes = http_list(
+ [HOST,'grace','DOCUMENTS','TECHNICAL_NOTES'],
+ timeout=TIMEOUT, pattern=R1, sort=True)
# for each file on the remote server
for fi,remote_mtime in zip(remote_files,remote_mtimes):
# extract filename from regex object
- remote_path = [ftp.host,'grace','DOCUMENTS','TECHNICAL_NOTES',fi]
+ remote_path = [HOST,'grace','DOCUMENTS','TECHNICAL_NOTES',fi]
local_file = DIRECTORY.joinpath(fi)
- ftp_mirror_file(ftp, remote_path, remote_mtime,
+ http_pull_file(remote_path, remote_mtime,
local_file, TIMEOUT=TIMEOUT, LIST=LIST,
- CLOBBER=CLOBBER, CHECKSUM=CHECKSUM, MODE=MODE)
+ CLOBBER=CLOBBER, MODE=MODE)
# GRACE and GRACE-FO newsletters
if NEWSLETTERS:
@@ -203,25 +196,24 @@ def gfz_isdc_grace_ftp(DIRECTORY, PROC=[], DREL=[], VERSION=[],
NAME = mi.upper().replace('-','_')
R1 = re.compile(rf'{NAME}_SDS_NL_(\d+).pdf', re.VERBOSE)
# find years for GRACE/GRACE-FO newsletters
- years,_ = gravtk.utilities.ftp_list(
- [ftp.host,mi,'DOCUMENTS','NEWSLETTER'],
- timeout=TIMEOUT, basename=True, pattern=r'\d+',
+ years,_ = http_list([HOST,mi,'DOCUMENTS','NEWSLETTER'],
+ timeout=TIMEOUT, pattern=r'\d+',
sort=True)
# for each year of GRACE/GRACE-FO newsletters
for Y in years:
# find GRACE/GRACE-FO newsletters
- remote_files,remote_mtimes = gravtk.utilities.ftp_list(
- [ftp.host,mi,'DOCUMENTS','NEWSLETTER',Y],
- timeout=TIMEOUT, basename=True, pattern=R1,
+ remote_files,remote_mtimes = http_list(
+ [HOST,mi,'DOCUMENTS','NEWSLETTER',Y],
+ timeout=TIMEOUT, pattern=R1,
sort=True)
# for each file on the remote server
for fi,remote_mtime in zip(remote_files,remote_mtimes):
# extract filename from regex object
- remote_path = [ftp.host,mi,'DOCUMENTS','NEWSLETTER',Y,fi]
+ remote_path = [HOST,mi,'DOCUMENTS','NEWSLETTER',Y,fi]
local_file = local_dir.joinpath(fi)
- ftp_mirror_file(ftp, remote_path, remote_mtime,
+ http_pull_file(remote_path, remote_mtime,
local_file, TIMEOUT=TIMEOUT, LIST=LIST,
- CLOBBER=CLOBBER, CHECKSUM=CHECKSUM, MODE=MODE)
+ CLOBBER=CLOBBER, MODE=MODE)
# GRACE/GRACE-FO level-2 spherical harmonic products
logging.info('GRACE/GRACE-FO L2 Global Spherical Harmonics:')
@@ -249,16 +241,16 @@ def gfz_isdc_grace_ftp(DIRECTORY, PROC=[], DREL=[], VERSION=[],
# compile the regular expression operator to find files
R1 = re.compile(rf'({ds}-(.*?)(gz|txt|dif))')
# get filenames from remote directory
- remote_files,remote_mtimes = gravtk.utilities.ftp_list(
- [ftp.host,mi,'Level-2',pr,drel_str], timeout=TIMEOUT,
- basename=True, pattern=R1, sort=True)
+ remote_files,remote_mtimes = http_list(
+ [HOST,mi,'Level-2',pr,drel_str], timeout=TIMEOUT,
+ pattern=R1, sort=True)
for fi,remote_mtime in zip(remote_files,remote_mtimes):
# extract filename from regex object
- remote_path = [ftp.host,mi,'Level-2',pr,drel_str,fi]
+ remote_path = [HOST,mi,'Level-2',pr,drel_str,fi]
local_file = local_dir.joinpath(fi)
- ftp_mirror_file(ftp, remote_path, remote_mtime,
+ http_pull_file(remote_path, remote_mtime,
local_file, TIMEOUT=TIMEOUT, LIST=LIST,
- CLOBBER=CLOBBER, CHECKSUM=CHECKSUM, MODE=MODE)
+ CLOBBER=CLOBBER, MODE=MODE)
# regular expression operator for data product
rx = gravtk.utilities.compile_regex_pattern(
pr, rl, ds, mission=shortname[mi])
@@ -278,35 +270,97 @@ def gfz_isdc_grace_ftp(DIRECTORY, PROC=[], DREL=[], VERSION=[],
# change permissions of index file
index_file.chmod(mode=MODE)
- # close the ftp connection
- ftp.quit()
# close log file and set permissions level to MODE
if LOG:
LOGFILE.chmod(mode=MODE)
+# PURPOSE: list a directory on the GFZ https server
+def http_list(
+ HOST: str | list,
+ timeout: int | None = None,
+ context: ssl.SSLContext = gravtk.utilities._default_ssl_context,
+ pattern: str | re.Pattern = '',
+ sort: bool = False
+ ):
+ """
+ List a directory on the GFZ https Server
+
+ Parameters
+ ----------
+ HOST: str or list
+ remote http host path
+ timeout: int or NoneType, default None
+ timeout in seconds for blocking operations
+ context: obj, default gravity_toolkit.utilities._default_ssl_context
+ SSL context for ``urllib`` opener object
+ pattern: str, default ''
+ regular expression pattern for reducing list
+ sort: bool, default False
+ sort output list
+
+ Returns
+ -------
+ colnames: list
+ column names in a directory
+ collastmod: list
+ last modification times for items in the directory
+ """
+ # verify inputs for remote http host
+ if isinstance(HOST, str):
+ HOST = gravtk.utilities.url_split(HOST)
+ # regular expression pattern for finding files and modification times
+ parser = r'\(.*?)\<\/a\>\s+(\d{4}-\d{2}-\d{2}\s+\d{2}\:\d{2})'
+ rx = re.compile(parser, re.VERBOSE)
+ # try listing from http
+ try:
+ # Create and submit request.
+ request = gravtk.utilities.urllib2.Request(posixpath.join(*HOST))
+ response = gravtk.utilities.urllib2.urlopen(request,
+ timeout=timeout, context=context)
+ except Exception as exc:
+ raise Exception('List error from {0}'.format(posixpath.join(*HOST)))
+ # read the directory listing
+ contents = response.readlines()
+ # read and parse request for files (column names and modified times)
+ lines = [l for l in contents if rx.search(l.decode('utf-8'))]
+ # column names and last modified times
+ colnames = [None]*len(lines)
+ collastmod = [None]*len(lines)
+ for i, l in enumerate(lines):
+ colnames[i], lastmod = rx.findall(l.decode('utf-8')).pop()
+ # get the Unix timestamp value for a modification time
+ collastmod[i] = gravtk.utilities.get_unix_time(lastmod,
+ format='%Y-%m-%d %H:%M')
+ # reduce using regular expression pattern
+ if pattern:
+ i = [i for i,f in enumerate(colnames) if re.search(pattern, f)]
+ # reduce list of column names and last modified times
+ colnames = [colnames[indice] for indice in i]
+ collastmod = [collastmod[indice] for indice in i]
+ # sort the list
+ if sort:
+ i = [i for i,j in sorted(enumerate(colnames), key=lambda i: i[1])]
+ # sort list of column names and last modified times
+ colnames = [colnames[indice] for indice in i]
+ collastmod = [collastmod[indice] for indice in i]
+ # return the list of column names and last modified times
+ return (colnames, collastmod)
+
# PURPOSE: pull file from a remote host checking if file exists locally
# and if the remote file is newer than the local file
-def ftp_mirror_file(ftp,remote_path,remote_mtime,local_file,
- TIMEOUT=None,LIST=False,CLOBBER=False,CHECKSUM=False,MODE=0o775):
+def http_pull_file(remote_path, remote_mtime, local_file,
+ TIMEOUT=0, LIST=False, CLOBBER=False, MODE=0o775):
+ # verify inputs for remote http host
+ if isinstance(remote_path, str):
+ remote_path = gravtk.utilities.url_split(remote_path)
+ # construct remote file path
+ remote_file = posixpath.join(*remote_path)
# if file exists in file system: check if remote file is newer
TEST = False
OVERWRITE = ' (clobber)'
# check if local version of file exists
local_file = pathlib.Path(local_file).expanduser().absolute()
- if CHECKSUM and local_file.exists():
- # generate checksum hash for local file
- # open the local_file in binary read mode
- local_hash = gravtk.utilities.get_hash(local_file)
- # copy remote file contents to bytesIO object
- remote_buffer = gravtk.utilities.from_ftp(remote_path,
- timeout=TIMEOUT)
- # generate checksum hash for remote file
- remote_hash = hashlib.md5(remote_buffer.getvalue()).hexdigest()
- # compare checksums
- if (local_hash != remote_hash):
- TEST = True
- OVERWRITE = f' (checksums: {local_hash} {remote_hash})'
- elif local_file.exists():
+ if local_file.exists():
# check last modification time of local file
local_mtime = local_file.stat().st_mtime
# if remote file is newer: overwrite the local file
@@ -320,23 +374,21 @@ def ftp_mirror_file(ftp,remote_path,remote_mtime,local_file,
# if file does not exist locally, is to be overwritten, or CLOBBER is set
if TEST or CLOBBER:
# Printing files transferred
- remote_ftp_url = posixpath.join('ftp://',*remote_path)
- logging.info(f'{remote_ftp_url} -->')
+ logging.info(f'{remote_file} --> ')
logging.info(f'\t{str(local_file)}{OVERWRITE}\n')
# if executing copy command (not only printing the files)
if not LIST:
- # copy file from ftp server or from bytesIO object
- if CHECKSUM and local_file.exists():
- # store bytes to file using chunked transfer encoding
- remote_buffer.seek(0)
- with local_file.open(mode='wb') as f:
- shutil.copyfileobj(remote_buffer, f, 16 * 1024)
- else:
- # path to remote file
- remote_file = posixpath.join(*remote_path[1:])
- # copy remote file contents to local file
- with local_file.open(mode='wb') as f:
- ftp.retrbinary(f'RETR {remote_file}', f.write)
+ # Create and submit request. There are a wide range of exceptions
+ # that can be thrown here, including HTTPError and URLError.
+ request = gravtk.utilities.urllib2.Request(remote_file)
+ response = gravtk.utilities.urllib2.urlopen(request,
+ timeout=TIMEOUT)
+ # chunked transfer encoding size
+ CHUNK = 16 * 1024
+ # copy contents to local file using chunked transfer encoding
+ # transfer should work properly with ascii and binary data formats
+ with local_file.open(mode='wb') as f:
+ shutil.copyfileobj(response, f, CHUNK)
# keep remote modification time of file and local access time
os.utime(local_file, (local_file.stat().st_atime, remote_mtime))
local_file.chmod(mode=MODE)
@@ -385,9 +437,6 @@ def arguments():
parser.add_argument('--list','-L',
default=False, action='store_true',
help='Only print files that could be transferred')
- parser.add_argument('--checksum',
- default=False, action='store_true',
- help='Compare hashes to check for overwriting existing data')
parser.add_argument('--clobber','-C',
default=False, action='store_true',
help='Overwrite existing data in transfer')
@@ -404,14 +453,15 @@ def main():
parser = arguments()
args,_ = parser.parse_known_args()
+ # GFZ ISDC https host
+ HOST = 'https://isdc-data.gfz.de/'
# check internet connection before attempting to run program
- HOST = 'isdcftp.gfz-potsdam.de'
- if gravtk.utilities.check_ftp_connection(HOST):
- gfz_isdc_grace_ftp(args.directory, PROC=args.center,
+ if gravtk.utilities.check_connection(HOST):
+ gfz_isdc_grace_sync(args.directory, PROC=args.center,
DREL=args.release, VERSION=args.version,
NEWSLETTERS=args.newsletters, TIMEOUT=args.timeout,
LIST=args.list, LOG=args.log, CLOBBER=args.clobber,
- CHECKSUM=args.checksum, MODE=args.mode)
+ MODE=args.mode)
else:
raise RuntimeError('Check internet connection')
diff --git a/access/podaac_cumulus.py b/access/podaac_cumulus.py
index 71a85905..dcae0784 100644
--- a/access/podaac_cumulus.py
+++ b/access/podaac_cumulus.py
@@ -4,6 +4,7 @@
Written by Tyler Sutterley (11/2024)
Syncs GRACE/GRACE-FO data from NASA JPL PO.DAAC Cumulus AWS S3 bucket
+
S3 Cumulus syncs are only available in AWS instances in us-west-2
Register with NASA Earthdata Login system:
diff --git a/doc/source/api_reference/access/gfz_isdc_dealiasing_ftp.rst b/doc/source/api_reference/access/gfz_isdc_dealiasing_sync.rst
similarity index 65%
rename from doc/source/api_reference/access/gfz_isdc_dealiasing_ftp.rst
rename to doc/source/api_reference/access/gfz_isdc_dealiasing_sync.rst
index ca9af1e0..da1867d6 100644
--- a/doc/source/api_reference/access/gfz_isdc_dealiasing_ftp.rst
+++ b/doc/source/api_reference/access/gfz_isdc_dealiasing_sync.rst
@@ -1,20 +1,20 @@
-==========================
-gfz_isdc_dealiasing_ftp.py
-==========================
+===========================
+gfz_isdc_dealiasing_sync.py
+===========================
- Syncs GRACE Level-1b dealiasing products from the `GFZ Information System and Data Center (ISDC) `_
- Optionally outputs as monthly tar files
`Source code`__
-.. __: https://github.com/tsutterley/gravity-toolkit/blob/main/access/gfz_isdc_dealiasing_ftp.py
+.. __: https://github.com/tsutterley/gravity-toolkit/blob/main/access/gfz_isdc_dealiasing_sync.py
Calling Sequence
################
.. argparse::
- :filename: gfz_isdc_dealiasing_ftp.py
+ :filename: gfz_isdc_dealiasing_sync.py
:func: arguments
- :prog: gfz_isdc_dealiasing_ftp.py
+ :prog: gfz_isdc_dealiasing_sync.py
:nodescription:
:nodefault:
diff --git a/doc/source/api_reference/access/gfz_isdc_grace_ftp.rst b/doc/source/api_reference/access/gfz_isdc_grace_sync.rst
similarity index 75%
rename from doc/source/api_reference/access/gfz_isdc_grace_ftp.rst
rename to doc/source/api_reference/access/gfz_isdc_grace_sync.rst
index 033e5cc5..303391fa 100644
--- a/doc/source/api_reference/access/gfz_isdc_grace_ftp.rst
+++ b/doc/source/api_reference/access/gfz_isdc_grace_sync.rst
@@ -1,6 +1,6 @@
-=====================
-gfz_isdc_grace_ftp.py
-=====================
+======================
+gfz_isdc_grace_sync.py
+======================
- Syncs GRACE/GRACE-FO and auxiliary data from the `GFZ Information System and Data Center (ISDC) `_
- Syncs CSR/GFZ/JPL Level-2 spherical harmonic files
@@ -10,14 +10,14 @@ gfz_isdc_grace_ftp.py
`Source code`__
-.. __: https://github.com/tsutterley/gravity-toolkit/blob/main/access/gfz_isdc_grace_ftp.py
+.. __: https://github.com/tsutterley/gravity-toolkit/blob/main/access/gfz_isdc_grace_sync.py
Calling Sequence
################
.. argparse::
- :filename: gfz_isdc_grace_ftp.py
+ :filename: gfz_isdc_grace_sync.py
:func: arguments
- :prog: gfz_isdc_grace_ftp.py
+ :prog: gfz_isdc_grace_sync.py
:nodescription:
:nodefault:
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 8a32f1fa..de8eb843 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -159,8 +159,8 @@ Contribute
api_reference/access/cnes_grace_sync.rst
api_reference/access/esa_costg_swarm_sync.rst
api_reference/access/gfz_icgem_costg_ftp.rst
- api_reference/access/gfz_isdc_dealiasing_ftp.rst
- api_reference/access/gfz_isdc_grace_ftp.rst
+ api_reference/access/gfz_isdc_dealiasing_sync.rst
+ api_reference/access/gfz_isdc_grace_sync.rst
api_reference/access/itsg_graz_grace_sync.rst
api_reference/access/podaac_cumulus.rst
diff --git a/pixi.lock b/pixi.lock
index 3c318e56..a289b9e1 100644
--- a/pixi.lock
+++ b/pixi.lock
@@ -5031,7 +5031,7 @@ packages:
- pypi: ./
name: gravity-toolkit
version: 1.2.4
- sha256: 78349d849cc9f0f33250239d7f0eb3bfe0e4c387f07127875a7c8b1409e3f645
+ sha256: 3f8afea7f56123b97f283b73aba8217cd7af40580e34057759f2e877d5ff6c88
requires_dist:
- boto3
- future
diff --git a/setup.py b/setup.py
index 855306c3..40ede658 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
import os
-from setuptools import setup, find_packages
+from setuptools import setup
# list of all scripts to be included with package
scripts = []
diff --git a/test/test_download_and_read.py b/test/test_download_and_read.py
index 700b7084..3980841a 100644
--- a/test/test_download_and_read.py
+++ b/test/test_download_and_read.py
@@ -28,6 +28,19 @@ def test_podaac_cumulus_download_and_read(username,password):
assert all((Ylms[key] == val) for key,val in test.items())
assert (Ylms['clm'][2,0] == -0.484169355584e-03)
+# PURPOSE: Download a GRACE file from GFZ and check that read program runs
+def test_gfz_http_download_and_read():
+ HOST=['https://isdc-data.gfz.de','grace','Level-2','CSR','RL06',
+ 'GSM-2_2002095-2002120_GRAC_UTCSR_BA01_0600.gz']
+ # download and read as virtual file object
+ FILE = gravtk.utilities.from_http(HOST,verbose=True)
+ Ylms = gravtk.read_GRACE_harmonics(FILE, 60)
+ keys = ['time', 'start', 'end', 'clm', 'slm', 'eclm', 'eslm', 'header']
+ test = dict(start=2452369.5, end=2452394.5)
+ assert all((key in Ylms.keys()) for key in keys)
+ assert all((Ylms[key] == val) for key,val in test.items())
+ assert (Ylms['clm'][2,0] == -0.484169355584e-03)
+
# PURPOSE: Download a GRACE file from GFZ and check that read program runs
def test_gfz_ftp_download_and_read():
HOST=['isdcftp.gfz-potsdam.de','grace','Level-2','CSR','RL06',