Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 90 additions & 76 deletions scripts/catgen
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ from pathlib import Path

import fnmeta
import pystac
from pystac import Asset, Catalog, CatalogType, Item, ItemCollection, Link, RelType
from pystac import (Asset, Catalog, CatalogType, Item, ItemCollection, Link,
RelType)

LOG = logging.getLogger("catgen")

Expand All @@ -30,45 +31,45 @@ def get_times(fpath: Path) -> "tuple[datetime, datetime] | None":


def generate_collection(
collection_id: str,
pattern: str,
inputs: list[tuple[str, Path]],
basedir: Path,
) -> ItemCollection:
dirpath, pattern = os.path.split(pattern)
items: list[Item] = []

for fpath in Path(dirpath).glob(pattern):
LOG.debug("adding datafile %s", fpath)
times = get_times(fpath)
if not times:
LOG.warning("failed to determine file times for %s; skipping", fpath.name)
continue
start, end = times

try:
item = Item(
id=fpath.name.rsplit(".", 1)[0],
geometry=None,
bbox=None,
properties={},
datetime=start,
start_datetime=start,
end_datetime=end,
# The href will alway automatically be update to be absolute which will
# likely break consumers of this collection, so don't set it. Let consumers
# assume the asset paths are relateive to basedir.
# href=str(basedir),
assets={
fpath.name: Asset(
href=str(fpath.relative_to(basedir)), roles=["data"]
),
},
**({"collection": collection_id} if collection_id else {}), # type: ignore
)
items.append(item)
except pystac.STACError:
LOG.exception("failed to create or add item for %s", fpath.name)
continue
for collection_id, pattern in inputs:
dirpath, pattern = os.path.split(pattern)
for fpath in Path(dirpath).glob(pattern):
LOG.debug("adding datafile %s", fpath)
times = get_times(fpath)
if not times:
LOG.warning("failed to determine file times for %s; skipping", fpath.name)
continue
start, end = times

try:
item = Item(
id=fpath.name.rsplit(".", 1)[0],
geometry=None,
bbox=None,
properties={},
datetime=start,
start_datetime=start,
end_datetime=end,
# The href will alway automatically be update to be absolute which will
# likely break consumers of this collection, so don't set it. Let consumers
# assume the asset paths are relateive to basedir.
# href=str(basedir),
assets={
fpath.name: Asset(
href=str(fpath.relative_to(basedir)), roles=["data"]
),
},
collection=collection_id
)
items.append(item)
except pystac.STACError:
LOG.exception("failed to create or add item for %s", fpath.name)
continue

if len(items) == 0:
raise ValueError("(catgen) Failed to catalog any files")
Expand All @@ -79,46 +80,48 @@ def generate_collection(


def generate_catalog(
collection_id: str,
pattern: str,
inputs: list[tuple[str, Path]],
basedir: Path,
) -> Catalog:
dirpath, pattern = os.path.split(pattern)
catalog = Catalog(collection_id, description="STAC Catalog")

catalog = Catalog("catgen", description="STAC Catalog generated by catgen")
catalog.add_link(Link(RelType.SELF, str(basedir / "catalog.json")))

for fpath in Path(dirpath).glob(pattern):
LOG.debug("adding datafile %s", fpath)
times = get_times(fpath)
if not times:
LOG.warning("failed to determine file times for %s; skipping", fpath.name)
continue
start, end = times
try:
item = Item(
id=fpath.name.rsplit(".", 1)[0],
geometry=None,
bbox=None,
properties={},
datetime=start,
start_datetime=start,
end_datetime=end,
assets={
fpath.name: Asset(href=str(fpath), roles=["data"]),
},
**({"collection": collection_id} if collection_id else {}), # type: ignore
)
meta_path = fpath.with_suffix(".json")
LOG.debug("adding metadata %s", meta_path)
item.add_asset(
meta_path.name, Asset(href=str(meta_path), roles=["metadata"])
)
json.dump(item.to_dict(), open(meta_path, "w"))

catalog.add_link(Link(RelType.ITEM, str(meta_path), "application/json"))
except pystac.STACError:
LOG.exception("failed to create or add item for %s", fpath.name)
continue
for collection_id, pattern in inputs:
dirpath, pattern = os.path.split(pattern)

for fpath in Path(dirpath).glob(pattern):
LOG.debug("adding datafile %s", fpath)
times = get_times(fpath)
if not times:
LOG.warning("failed to determine file times for %s; skipping", fpath.name)
continue
start, end = times
try:
item = Item(
id=fpath.name.rsplit(".", 1)[0],
geometry=None,
bbox=None,
properties={},
datetime=start,
start_datetime=start,
end_datetime=end,
assets={
fpath.name: Asset(href=str(fpath), roles=["data"]),
},
collection=collection_id,
)
meta_path = fpath.with_suffix(".json")
LOG.debug("adding metadata %s", meta_path)
item.add_asset(
meta_path.name, Asset(href=str(meta_path), roles=["metadata"])
)
json.dump(item.to_dict(), open(meta_path, "w"))

catalog.add_link(Link(RelType.ITEM, str(meta_path), "application/json"))
except pystac.STACError:
LOG.exception("failed to create or add item for %s", fpath.name)
continue

if len(list(catalog.get_all_items())) == 0:
raise ValueError("(catgen) Failed to catalog any files")
Expand All @@ -140,18 +143,29 @@ if __name__ == "__main__":
default="catalog",
help="Write either a Catalog or FeatureCollection",
)
parser.add_argument("collection_id")
def inputtype(v: str) -> tuple[str, Path]:
r = [s.strip() for s in v.split(",")]
return r[0], Path(r[1]).absolute()

parser.add_argument(
"file_pattern", help="Glob style file pattern to include in catalog"
"input",
nargs="+",
metavar="SPEC",
type=inputtype,
help=(
"Where SPEC is <collection_id>,<file-pattern>; file-pattern is a shell style "
"glob pattern. Each file found matching file-pattern will be assigned a collection "
"id of collection. One or more items must be specified."),
)
args = parser.parse_args()

print(args.input)

logging.basicConfig(level=logging.INFO, format="%(name)s -- %(message)s")
LOG.setLevel(logging.DEBUG if args.verbose else logging.INFO)

catalog = globals()[f"generate_{args.type}"](
args.collection_id,
Path(args.file_pattern).absolute(),
args.input,
basedir=Path(".").absolute(),
)
if args.verbose:
Expand Down