From f56f7d2f154ce897e98c6401a85bcedf6f0197ce Mon Sep 17 00:00:00 2001 From: "aaryan@fused.io" Date: Thu, 3 Jul 2025 18:17:31 +0000 Subject: [PATCH] test udf --- community/user/ov/README.MD | 28 +++++++ community/user/ov/meta.json | 150 ++++++++++++++++++++++++++++++++++++ community/user/ov/ov.py | 26 +++++++ community/user/ov/utils.py | 137 ++++++++++++++++++++++++++++++++ 4 files changed, 341 insertions(+) create mode 100644 community/user/ov/README.MD create mode 100644 community/user/ov/meta.json create mode 100644 community/user/ov/ov.py create mode 100644 community/user/ov/utils.py diff --git a/community/user/ov/README.MD b/community/user/ov/README.MD new file mode 100644 index 0000000..e8a34d7 --- /dev/null +++ b/community/user/ov/README.MD @@ -0,0 +1,28 @@ + + +

UDF preview image

+ + +Tags: `overture` `coop` + + +Buildings footprints, places of interest (POIs), admin boundaries, and transportation globally from [Overture Maps](https://overturemaps.org/). + +## Parameters + +- `release`: Overture release ID. Defaults to `2024-12-18-0`. Note that `.` should be replaced with `-` in the ID. +- `type`: One of `infrastructure`, `land`, `land_use`, `water`, `division`, `division_boundary`, `division_area`, `place`, `connector`, `segment`, `address`, `building` (default). +- `theme`: One of `buildings`, `base`, `places`, `transportation`, `divisions`, `addresses`. If not specified, this will be inferred from the type. +- `use_columns`: Load only these columns if specified. Default is to load all columns. + +## Run this in any Jupyter Notebook + +```python +import fused +import geopandas as gpd + +udf = fused.load("https://github.com/fusedio/udfs/tree/main/public/Overture_Maps_Example") +gdf_output = fused.run(udf, x=2622, y=6333, z=14) +gdf = gpd.GeoDataFrame(gdf_output, geometry='geometry', crs='epsg:4326') +gdf.plot() +``` diff --git a/community/user/ov/meta.json b/community/user/ov/meta.json new file mode 100644 index 0000000..21a17b7 --- /dev/null +++ b/community/user/ov/meta.json @@ -0,0 +1,150 @@ +{ + "version": "0.0.3", + "job_config": { + "version": "0.0.3", + "name": null, + "steps": [ + { + "type": "udf", + "udf": { + "type": "geopandas_v2", + "name": "ov", + "entrypoint": "udf", + "parameters": {}, + "metadata": { + "fused:defaultParameters": [ + { + "parameter": "release", + "value": "", + "type": "string", + "suggestedValues": [ + "2024-08-20-0", + "2024-09-18-0", + "2024-10-23-0", + "2024-11-13-0", + "2024-12-18-0", + "2025-01-22-0", + "2025-03-19-1" + ] + }, + { + "parameter": "theme", + "value": "", + "type": "string", + "suggestedValues": [ + "buildings", + "base", + "places", + "transportation", + "addresses", + "divisions" + ] + }, + { + "parameter": "overture_type", + "value": "", + "type": "string", + "suggestedValues": [ + "land_use", + "water", + "place", + "connector", + "segment", + "building", + "address", + "infrastructure", + "land", + "division", + "division_area", + "division_boundary", + "land_cover", + "bathymetry" + ] + }, + { + "parameter": "use_columns", + "value": [], + "type": "array", + "suggestedValues": [ + "geometry", + "id", + "names", + "sources", + "update_time", + "categories" + ] + } + ], + "fused:vizConfig": { + "tileLayer": { + "@@type": "TileLayer", + "minZoom": 0, + "maxZoom": 19, + "tileSize": 256, + "pickable": true + }, + "rasterLayer": { + "@@type": "BitmapLayer" + }, + "vectorLayer": { + "@@type": "GeoJsonLayer", + "stroked": true, + "filled": false, + "pickable": true, + "lineWidthMinPixels": 1, + "getLineColor": { + "@@function": "hasProp", + "property": "r", + "present": "@@=[properties.r, properties.g, properties.b]", + "absent": [ + 200, + 250, + 0 + ] + }, + "getFillColor": [ + 255, + 0, + 0, + 40 + ] + } + }, + "fused:udfType": "vector_tile", + "fused:slug": "ov", + "fused:name": "ov", + "fused:defaultViewState": { + "enable": true, + "latitude": 51.50786698991621, + "longitude": -0.10589130924052784, + "zoom": 13.935948485009598, + "pitch": 0, + "bearing": 0 + }, + "fused:assetUrl": "https://fused-magic.s3.us-west-2.amazonaws.com/thumbnails/udfs-staging/Overture_Map_Example2.png", + "fused:tags": [ + { + "id": "overture", + "label": "overture" + }, + { + "id": "coop", + "label": "coop" + } + ], + "fused:description": "Buildings footprints, places of interest (POIs), admin boundaries, and transportation globally from [Overture Maps](https://overturemaps.org/).\n\n## Parameters\n\n- `release`: Overture release ID. Defaults to `2024-12-18-0`. Note that `.` should be replaced with `-` in the ID.\n- `type`: One of `infrastructure`, `land`, `land_use`, `water`, `division`, `division_boundary`, `division_area`, `place`, `connector`, `segment`, `address`, `building` (default).\n- `theme`: One of `buildings`, `base`, `places`, `transportation`, `divisions`, `addresses`. If not specified, this will be inferred from the type.\n- `use_columns`: Load only these columns if specified. Default is to load all columns.\n\n## Run this in any Jupyter Notebook\n\n```python\nimport fused\nimport geopandas as gpd\n\nudf = fused.load(\"https://github.com/fusedio/udfs/tree/main/public/Overture_Maps_Example\")\ngdf_output = fused.run(udf, x=2622, y=6333, z=14)\ngdf = gpd.GeoDataFrame(gdf_output, geometry='geometry', crs='epsg:4326')\ngdf.plot()\n```\n", + "fused:explorerPin": "1" + }, + "source": "ov.py", + "headers": [ + { + "module_name": "utils", + "source_file": "utils.py" + } + ] + } + } + ], + "metadata": null + } +} \ No newline at end of file diff --git a/community/user/ov/ov.py b/community/user/ov/ov.py new file mode 100644 index 0000000..349722f --- /dev/null +++ b/community/user/ov/ov.py @@ -0,0 +1,26 @@ +@fused.udf +def udf( + bounds: fused.types.Bounds = None, + release: str = "2025-03-19-1", + theme: str = None, + overture_type: str = None, + use_columns: list = None, + num_parts: int = None, + min_zoom: int = None, + polygon: str = None, + point_convert: str = None +): + from utils import get_overture + + gdf = get_overture( + bounds=bounds, + release=release, + theme=theme, + overture_type=overture_type, + use_columns=use_columns, + num_parts=num_parts, + min_zoom=min_zoom, + polygon=polygon, + point_convert=point_convert + ) + return gdf diff --git a/community/user/ov/utils.py b/community/user/ov/utils.py new file mode 100644 index 0000000..0e93d86 --- /dev/null +++ b/community/user/ov/utils.py @@ -0,0 +1,137 @@ +def get_overture( + bounds: fused.types.Bounds = None, + release: str = "2025-03-19-1", + theme: str = None, + overture_type: str = None, + use_columns: list = None, + num_parts: int = None, + min_zoom: int = None, + polygon: str = None, + point_convert: str = None +): + """Returns Overture data as a GeoDataFrame.""" + import logging + import concurrent.futures + import json + + import geopandas as gpd + import pandas as pd + from shapely.geometry import shape, box + + # Load pinned versions of utility functions. + utils = fused.load("https://github.com/fusedio/udfs/tree/d0e8eb0/public/common/").utils + + if release == "2024-02-15-alpha-0": + if overture_type == "administrative_boundary": + overture_type = "administrativeBoundary" + elif overture_type == "land_use": + overture_type = "landUse" + theme_per_type = { + "building": "buildings", + "administrativeBoundary": "admins", + "place": "places", + "landUse": "base", + "water": "base", + "segment": "transportation", + "connector": "transportation", + } + elif release == "2024-03-12-alpha-0": + theme_per_type = { + "building": "buildings", + "administrative_boundary": "admins", + "place": "places", + "land_use": "base", + "water": "base", + "segment": "transportation", + "connector": "transportation", + } + else: + theme_per_type = { + "address": "addresses", + "building": "buildings", + "infrastructure": "base", + "land": "base", + "land_use": "base", + "land_cover": "base", + "water": "base", + "bathymetry": "base", + "place": "places", + "division": "divisions", + "division_area": "divisions", + "division_boundary": "divisions", + "segment": "transportation", + "connector": "transportation", + } + + if theme is None: + theme = theme_per_type.get(overture_type, "buildings") + + if overture_type is None: + type_per_theme = {v: k for k, v in theme_per_type.items()} + overture_type = type_per_theme[theme] + + if num_parts is None: + if overture_type == "building": + if release >= "2025-01-22-0": + num_parts = 6 + else: + num_parts = 5 + else: + num_parts = 1 + + if min_zoom is None: + if theme == "admins" or theme == "divisions": + min_zoom = 7 + elif theme == "base": + min_zoom = 9 + else: + min_zoom = 12 + + table_path = f"s3://us-west-2.opendata.source.coop/fused/overture/{release}/theme={theme}/type={overture_type}" + table_path = table_path.rstrip("/") + + if polygon is not None: + polygon=gpd.GeoDataFrame.from_features(json.loads(polygon)) + tile = polygon.geometry.bounds + tile = gpd.GeoDataFrame( + { + "geometry": [ + box( + bounds[0], + bounds[1], + bounds[2], + bounds[3], + ) + ] + } + ) + + def get_part(part): + part_path = f"{table_path}/part={part}/" if num_parts != 1 else table_path + try: + return utils.table_to_tile( + bounds, table=part_path, use_columns=use_columns, min_zoom=min_zoom + ) + except ValueError: + return None + + if num_parts > 1: + with concurrent.futures.ThreadPoolExecutor(max_workers=num_parts) as pool: + dfs = list(pool.map(get_part, range(num_parts))) + else: + # Don't bother creating a thread pool to do one thing + dfs = [get_part(0)] + + dfs = [df for df in dfs if df is not None] + + if len(dfs): + gdf = pd.concat(dfs) + + else: + logging.warn("Failed to get any data") + return None + + if point_convert is not None: + gdf["geometry"] = gdf.geometry.centroid + + return gdf