Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 122 additions & 37 deletions access/gfz_isdc_dealiasing_ftp.py → access/gfz_isdc_dealiasing_sync.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
#!/usr/bin/env python
u"""
gfz_isdc_dealiasing_ftp.py
Written by Tyler Sutterley (05/2023)
gfz_isdc_dealiasing_sync.py
Written by Tyler Sutterley (10/2025)
Syncs GRACE Level-1b dealiasing products from the GFZ Information
System and Data Center (ISDC)

Optionally outputs as monthly tar files

CALLING SEQUENCE:
python gfz_isdc_dealiasing_ftp.py --year=2015 --release=RL06 --tar
python gfz_isdc_dealiasing_sync.py --year=2015 --release=RL06 --tar

COMMAND LINE OPTIONS:
-D X, --directory X: working data directory
Expand All @@ -30,6 +31,7 @@
utilities.py: download and management utilities for syncing files

UPDATE HISTORY:
Updated 10/2025: switch to https as ftp server is being retired
Updated 05/2023: use pathlib to define and operate on paths
Updated 03/2023: increase default year range to sync
Updated 12/2022: single implicit import of gravity toolkit
Expand All @@ -51,8 +53,9 @@
import sys
import os
import re
import ssl
import time
import ftplib
import shutil
import logging
import pathlib
import tarfile
Expand All @@ -62,7 +65,7 @@

# PURPOSE: syncs GRACE Level-1b dealiasing products from the GFZ data server
# and optionally outputs as monthly tar files
def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
def gfz_isdc_dealiasing_sync(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
TIMEOUT=None, LOG=False, CLOBBER=False, MODE=None):
# check if directory exists and recursively create if not
base_dir = pathlib.Path(base_dir).expanduser().absolute()
Expand All @@ -81,10 +84,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
# standard output (terminal output)
logging.basicConfig(level=logging.INFO)

# remote HOST for DREL on GFZ data server
# connect and login to GFZ ftp server
ftp = ftplib.FTP('isdcftp.gfz-potsdam.de', timeout=TIMEOUT)
ftp.login()
# GFZ ISDC https host
HOST = 'https://isdc-data.gfz.de/'

# compile regular expression operator for years to sync
if YEAR is None:
Expand All @@ -97,9 +98,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
SUFFIX = dict(RL04='tar.gz', RL05='tar.gz', RL06='tgz')

# find remote yearly directories for DREL
YRS,_ = gravtk.utilities.ftp_list([ftp.host,'grace',
'Level-1B', 'GFZ','AOD',DREL], timeout=TIMEOUT, basename=True,
pattern=R1, sort=True)
YRS,_ = http_list([HOST,'grace','Level-1B', 'GFZ','AOD',DREL],
timeout=TIMEOUT, basename=True, pattern=R1, sort=True)
# for each year
for Y in YRS:
# for each month of interest
Expand All @@ -114,8 +114,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
# will extract year and month and calendar day from the ascii file
regex_pattern = r'AOD1B_({0})-({1:02d})-(\d+)_X_\d+.asc.gz$'
R2 = re.compile(regex_pattern.format(Y,M), re.VERBOSE)
remote_files,remote_mtimes = gravtk.utilities.ftp_list(
[ftp.host,'grace','Level-1B','GFZ','AOD',DREL,Y],
remote_files,remote_mtimes = http_list(
[HOST,'grace','Level-1B','GFZ','AOD',DREL,Y],
timeout=TIMEOUT, basename=True, pattern=R2, sort=True)
file_count = len(remote_files)
# if compressing into monthly tar files
Expand All @@ -124,10 +124,10 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
tar = tarfile.open(name=local_tar_file, mode='w:gz')
for fi,remote_mtime in zip(remote_files,remote_mtimes):
# remote version of each input file
remote = [ftp.host,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
logging.info(posixpath.join('ftp://',*remote))
remote = [HOST,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
logging.info(posixpath.join(*remote))
# retrieve bytes from remote file
remote_buffer = gravtk.utilities.from_ftp(remote,
remote_buffer = gravtk.utilities.from_sync(remote,
timeout=TIMEOUT)
# add file to tar
tar_info = tarfile.TarInfo(name=fi)
Expand All @@ -142,23 +142,96 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
# copy each gzip file and keep as individual daily files
for fi,remote_mtime in zip(remote_files,remote_mtimes):
# remote and local version of each input file
remote = [ftp.host,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
remote = [HOST,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
local_file = grace_dir.joinpath(fi)
ftp_mirror_file(ftp,remote,remote_mtime,local_file,
http_pull_file(remote,remote_mtime,local_file,
CLOBBER=CLOBBER, MODE=MODE)

# close the ftp connection
ftp.quit()
# close log file and set permissions level to MODE
if LOG:
LOGFILE.chmod(mode=MODE)

# PURPOSE: list a directory on the GFZ https server
def http_list(
HOST: str | list,
timeout: int | None = None,
context: ssl.SSLContext = gravtk.utilities._default_ssl_context,
pattern: str | re.Pattern = '',
sort: bool = False
):
"""
List a directory on the GFZ https Server

Parameters
----------
HOST: str or list
remote http host path
timeout: int or NoneType, default None
timeout in seconds for blocking operations
context: obj, default gravity_toolkit.utilities._default_ssl_context
SSL context for ``urllib`` opener object
pattern: str, default ''
regular expression pattern for reducing list
sort: bool, default False
sort output list

Returns
-------
colnames: list
column names in a directory
collastmod: list
last modification times for items in the directory
"""
# verify inputs for remote http host
if isinstance(HOST, str):
HOST = gravtk.utilities.url_split(HOST)
# regular expression pattern for finding files and modification times
parser = r'\<a\shref=.*?\>(.*?)\<\/a\>\s+(\d{4}-\d{2}-\d{2}\s+\d{2}\:\d{2})'
rx = re.compile(parser, re.VERBOSE)
# try listing from http
try:
# Create and submit request.
request = gravtk.utilities.urllib2.Request(posixpath.join(*HOST))
response = gravtk.utilities.urllib2.urlopen(request,
timeout=timeout, context=context)
except Exception as exc:
raise Exception('List error from {0}'.format(posixpath.join(*HOST)))
# read the directory listing
contents = response.readlines()
# read and parse request for files (column names and modified times)
lines = [l for l in contents if rx.search(l.decode('utf-8'))]
# column names and last modified times
colnames = [None]*len(lines)
collastmod = [None]*len(lines)
for i, l in enumerate(lines):
colnames[i], lastmod = rx.findall(l.decode('utf-8')).pop()
# get the Unix timestamp value for a modification time
collastmod[i] = gravtk.utilities.get_unix_time(lastmod,
format='%Y-%m-%d %H:%M')
# reduce using regular expression pattern
if pattern:
i = [i for i,f in enumerate(colnames) if re.search(pattern, f)]
# reduce list of column names and last modified times
colnames = [colnames[indice] for indice in i]
collastmod = [collastmod[indice] for indice in i]
# sort the list
if sort:
i = [i for i,j in sorted(enumerate(colnames), key=lambda i: i[1])]
# sort list of column names and last modified times
colnames = [colnames[indice] for indice in i]
collastmod = [collastmod[indice] for indice in i]
# return the list of column names and last modified times
return (colnames, collastmod)

# PURPOSE: pull file from a remote host checking if file exists locally
# and if the remote file is newer than the local file
def ftp_mirror_file(ftp,remote_path,remote_mtime,local_file,
CLOBBER=False,MODE=0o775):
# path to remote file
remote_file = posixpath.join(*remote_path[1:])
def http_pull_file(remote_path, remote_mtime, local_file,
TIMEOUT=0, LIST=False, CLOBBER=False, MODE=0o775):
# verify inputs for remote http host
if isinstance(remote_path, str):
remote_path = gravtk.utilities.url_split(remote_path)
# construct remote file path
remote_file = posixpath.join(*remote_path)
# if file exists in file system: check if remote file is newer
TEST = False
OVERWRITE = ' (clobber)'
Expand All @@ -178,15 +251,24 @@ def ftp_mirror_file(ftp,remote_path,remote_mtime,local_file,
# if file does not exist locally, is to be overwritten, or CLOBBER is set
if TEST or CLOBBER:
# Printing files transferred
remote_ftp_url = posixpath.join('ftp://',*remote_path)
logging.info(f'{remote_ftp_url} -->')
logging.info(f'\t{local_file}{OVERWRITE}\n')
# copy remote file contents to local file
with local_file.open(mode='wb') as f:
ftp.retrbinary(f'RETR {remote_file}', f.write)
# keep remote modification time of file and local access time
os.utime(local_file, (local_file.stat().st_atime, remote_mtime))
local_file.chmod(mode=MODE)
logging.info(f'{remote_file} --> ')
logging.info(f'\t{str(local_file)}{OVERWRITE}\n')
# if executing copy command (not only printing the files)
if not LIST:
# Create and submit request. There are a wide range of exceptions
# that can be thrown here, including HTTPError and URLError.
request = gravtk.utilities.urllib2.Request(remote_file)
response = gravtk.utilities.urllib2.urlopen(request,
timeout=TIMEOUT)
# chunked transfer encoding size
CHUNK = 16 * 1024
# copy contents to local file using chunked transfer encoding
# transfer should work properly with ascii and binary data formats
with local_file.open(mode='wb') as f:
shutil.copyfileobj(response, f, CHUNK)
# keep remote modification time of file and local access time
os.utime(local_file, (local_file.stat().st_atime, remote_mtime))
local_file.chmod(mode=MODE)

# PURPOSE: create argument parser
def arguments():
Expand Down Expand Up @@ -243,14 +325,17 @@ def main():
parser = arguments()
args,_ = parser.parse_known_args()

# GFZ ISDC https host
HOST = 'https://isdc-data.gfz.de/'
# check internet connection before attempting to run program
HOST = 'isdcftp.gfz-potsdam.de'
if gravtk.utilities.check_ftp_connection(HOST):
if gravtk.utilities.check_connection(HOST):
for DREL in args.release:
gfz_isdc_dealiasing_ftp(args.directory, DREL=DREL,
gfz_isdc_dealiasing_sync(args.directory, DREL=DREL,
YEAR=args.year, MONTHS=args.month, TAR=args.tar,
TIMEOUT=args.timeout, LOG=args.log,
CLOBBER=args.clobber, MODE=args.mode)
else:
raise RuntimeError('Check internet connection')

# run main program
if __name__ == '__main__':
Expand Down
Loading