erf-model · ewquon · Sep 3, 2025 · Aug 28, 2025 · Aug 28, 2025 · Aug 28, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+# Mac
+.DS_Store
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -127,3 +130,12 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# Downloads
+*.grib
+*.grib2
+
+# Outputs
+*.vtk
+*.bin
+*.png
diff --git a/README.md b/README.md
@@ -1,5 +1,24 @@
-# ERFtools
-A collection of scripts for facilitating the usage of ERF.
+# ERF Tools
+A collection of Python-based modules and scripts for facilitating the usage of ERF.
+
+## Installation
+
+Clone and create a conda (or mamba) environment:
+```shell
+git clone https://github.com/erf-model/erftools.git
+cd erftools
+conda create -n erftools python=3.9
+conda activate erftools
+```
+
+Then, install with:
+```shell
+pip install -e . # editable install
+```
+or install with all optional dependencies:
+```shell
+pip install -e .[all]
+```
 
 ## Examples
 
@@ -31,6 +50,8 @@ print(log.ds) # data are stored in an xarray dataset
 
 Some notes and recommendations:
 
-* An aspirational goal is to contribute code that can be used as in the examples above, with clear, intuitive naming and following PEP-8 style as a set of guidelines rather than gospel.
-* To avoid duplication, model constants are defined in `erf.constants`, which should replicate `ERF/Source/ERF_Constants.H`.
-* In the same vein, equation of state evaluations are defined in `erf.EOS`, which should replicate `ERF/Source/Utils/ERF_EOS.H`. 
+* An aspirational goal is to contribute code that can be used as in the examples above, with clear, intuitive naming.
+* To avoid duplication, model constants are defined in `erftools.constants`, which should replicate `ERF/Source/ERF_Constants.H`.
+* In the same vein, equation of state evaluations are defined in `erftools.utils.EOS`, which should replicate `ERF/Source/Utils/ERF_EOS.H`.
+* Other utilities for calculating/deriving/diagnosing quantities of interest are also in `erftools.utils.*`
+* Please follow PEP-8 style--as a set of guidelines rather than gospel--to facilitate code usage and maintenance by the community.
diff --git a/erftools/HSE.py b/erftools/HSE.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from .constants import R_d, Cp_d, CONST_GRAV
-from .EOS import getRhogivenThetaPress
+from .utils.EOS import getRhogivenThetaPress
 
 def Newton_Raphson_hse(p,F,dz,C,T,qv,
                        qt=None,

diff --git a/notebooks/era5/StateBordersCoordinates.txt → ...ols/data/US_state_borders_coordinates.txt b/notebooks/era5/StateBordersCoordinates.txt → ...ols/data/US_state_borders_coordinates.txt
diff --git a/erftools/data/__init__.py b/erftools/data/__init__.py
diff --git a/erftools/data/typical_atmosphere/__init__.py b/erftools/data/typical_atmosphere/__init__.py
diff --git a/...alAtmosphereData/pressure_vs_z_actual.txt → ...pical_atmosphere/pressure_vs_z_actual.txt b/...alAtmosphereData/pressure_vs_z_actual.txt → ...pical_atmosphere/pressure_vs_z_actual.txt
diff --git a/...ypicalAtmosphereData/temp_vs_z_actual.txt → ...a/typical_atmosphere/temp_vs_z_actual.txt b/...ypicalAtmosphereData/temp_vs_z_actual.txt → ...a/typical_atmosphere/temp_vs_z_actual.txt
diff --git a/...picalAtmosphereData/theta_vs_z_actual.txt → .../typical_atmosphere/theta_vs_z_actual.txt b/...picalAtmosphereData/theta_vs_z_actual.txt → .../typical_atmosphere/theta_vs_z_actual.txt
diff --git a/erftools/input_sounding.py b/erftools/input_sounding.py
@@ -4,7 +4,7 @@
 
 from .constants import R_d, Cp_d, Gamma, CONST_GRAV, p_0
 from .wrf.constants import rvovrd, cvpm
-from .EOS import getRhogivenThetaPress
+from .utils.EOS import getRhogivenThetaPress
 from .HSE import Newton_Raphson_hse
 
 

diff --git a/erftools/io/__init__.py b/erftools/io/__init__.py
@@ -0,0 +1,4 @@
+from .simple import write_binary_simple_erf
+from .vtk import (write_binary_vtk_on_native_grid,
+                  write_binary_vtk_on_cartesian_grid,
+                  write_vtk_map)
diff --git a/erftools/io/simple.py b/erftools/io/simple.py
@@ -0,0 +1,49 @@
+import numpy as np
+import struct
+
+def write_binary_simple_erf(output_binary,
+                            x_grid, y_grid, z_grid,
+                            lat_erf=None, lon_erf=None,
+                            point_data=None):
+
+    x_grid = np.asarray(x_grid)
+    y_grid = np.asarray(y_grid)
+
+    nrow, ncol = x_grid.shape
+    nz = len(z_grid[0,0,:])
+
+    # TODO: rewrite without nested loops for efficiency?
+    with open(output_binary, "wb") as file:
+        file.write(struct.pack('iiii', ncol, nrow, nz, len(point_data)))
+
+        if lat_erf is not None:
+            for j in range(nrow):  # Iterate over the y-dimension
+                for i in range(ncol):  # Iterate over the x-dimension
+                    file.write(struct.pack('f', lat_erf[i,j,0]))
+
+        if lon_erf is not None:
+            for j in range(nrow):  # Iterate over the y-dimension
+                for i in range(ncol):  # Iterate over the x-dimension
+                    file.write(struct.pack('f', lon_erf[i,j,0]))
+
+        # Write grid points using a nested for loop
+        for i in range(ncol):
+            x = x_grid[0, i]
+            file.write(struct.pack('f', x))
+
+        for j in range(nrow):
+            y = y_grid[j, 0]
+            file.write(struct.pack('f', y))
+
+        for k in range(nz):
+            zavg = np.mean(z_grid[:,:,k])
+            file.write(struct.pack('f', zavg))
+
+        # Write point data (if any)
+        if point_data:
+            for name, data in point_data.items():
+                for k in range(nz):  # Iterate over the z-dimension
+                    for j in range(nrow):  # Iterate over the y-dimension
+                        for i in range(ncol):  # Iterate over the x-dimension
+                            value = data[i, j, k]
+                            file.write(struct.pack('f', value))
diff --git a/erftools/io/vtk.py b/erftools/io/vtk.py
@@ -0,0 +1,229 @@
+import numpy as np
+import struct
+
+
+def write_binary_structured_vtk(filename,
+                                x_grid, y_grid, z_grid,
+                                point_data=None,
+                                velocity=None,
+                                indexing='xy',
+                                k_to_delete=[],
+                                mirrored_x=False,
+                                top_down=False,
+                                zoffset=0.0,
+                                skip_latlon=False):
+    """
+    General function to write a binary VTK file on a structured grid.
+
+    Arguments
+    ---------
+    filename : str
+        Name of the VTK file to write.
+    x_grid, y_grid, z_grid : numpy.ndarray
+        2D, 2D, and 3D arrays of grid coordinates, respectively.
+    point_data : dict, optional
+        Dictionary of output scalar data associated with the grid points.
+    velocity : numpy.ndarray, optional
+        4D array (nx,ny,nz,3) of output velocity data.
+    indexing : {'xy', 'ij'}, optional
+        Input grid indexing, following numpy naming convention
+    k_to_delete : list, optional
+        Indices of vertical levels to exclude
+    mirrored_x : bool, optional
+        Output has the first index reversed.
+    top_down : bool, optional
+        Output has the last index reversed.
+    zoffset : float, optional
+        Offset to apply to output grid in z direction.
+    skip_latlon : bool, optional
+        If point_data are given, skip writing 'latitude' and
+        'longitude' fields.
+    """
+    assert indexing in ('xy','ij') # Cartesian or matrix (row,col)
+
+    x_grid = np.asarray(x_grid)
+    y_grid = np.asarray(y_grid)
+
+    # Get grid extents
+    if indexing == 'xy':
+        nx, ny = x_grid.shape
+    elif indexing == 'ij':
+        ny, nx = x_grid.shape
+    nz = z_grid.shape[2]
+    print('nx, ny, nz =', nx, ny, nz)
+
+    nzval = nz - len(k_to_delete)
+
+    # Setup loop indices
+    irange = range(nx-1,-1,-1) if mirrored_x else range(nx)
+    krange = range(nz-1,-1,-1) if top_down else range(nz)
+
+    with open(filename, 'wb') as f:
+        # Write the VTK header
+        f.write(b'# vtk DataFile Version 3.0\n')
+        f.write(b'Generated by Python script\n')
+        f.write(b'BINARY\n')
+        f.write(b'DATASET STRUCTURED_GRID\n')
+        f.write(f'DIMENSIONS {nx} {ny} {nzval}\n'.encode())
+        f.write(f'POINTS {nx * ny * nzval} float\n'.encode())
+
+        # TODO: rewrite without nested loops for efficiency?
+
+        # Write grid points
+        #points = np.stack((x_grid.ravel(), y_grid.ravel(), z_grid.ravel()), axis=-1)
+        #f.write(struct.pack('>' + 'f' * points.size, *points.ravel()))
+
+        # Write grid points using a nested for loop
+        for k in krange: # loop may be reversed
+            if k in k_to_delete:
+                print("Val is ", k)
+                continue
+
+            z = np.mean(z_grid[:, :, k]) + zoffset
+
+            if indexing == 'xy':
+                for j in range(ny):
+                    for i in range(nx):
+                        x = x_grid[i, j]
+                        y = y_grid[i, j]
+                        f.write(struct.pack('>fff', x, y, z))
+            elif indexing == 'ij':
+                for j in range(ny):
+                    for i in range(nx):
+                        x = x_grid[j, i] # indices flipped
+                        y = y_grid[j, i]
+                        f.write(struct.pack('>fff', x, y, z))
+
+        # Write point data (if any)
+        if point_data is not None:
+            f.write(f'POINT_DATA {nx * ny * nzval}\n'.encode())
+            for name, data in point_data.items():
+                if skip_latlon and (name == "latitude" or name=="longitude"):
+                    continue
+                f.write(f'SCALARS {name} float 1\n'.encode())
+                f.write(b'LOOKUP_TABLE default\n')
+                for k in krange:  # Iterate over the z-dimension (may be reversed)
+                    if k in k_to_delete:
+                        continue
+                    for j in range(ny):  # Iterate over the y-dimension
+                        for i in irange:  # Iterate over the x-dimension (may be reversed)
+                            value = data[i, j, k]
+                            f.write(struct.pack('>f', value))
+
+        # Write velocity vector field
+        if velocity is not None:
+            f.write("VECTORS velocity float\n".encode())
+            for k in krange: # may be reversed
+                if k in k_to_delete:
+                    continue
+                for j in range(ny):
+                    for i in irange: # may be reversed
+                        vx, vy, vz = velocity[i, j, k]
+                        f.write(struct.pack('>fff', vx, vy, vz))
+
+
+# formerly `write_binary_vtk_structured_grid`
+def write_binary_vtk_on_native_grid(filename,
+                                    x_grid, y_grid, z_grid,
+                                    k_to_delete=[],
+                                    point_data=None,
+                                    velocity=None,
+                                    **kwargs):
+    if point_data is not None:
+        # don't write out lat,lon
+        point_data = {
+            k:v for k,v in point_data.items()
+            if k not in ['latitude','longitude']
+        }
+    return write_binary_structured_vtk(
+            filename,
+            x_grid, y_grid, z_grid,
+            point_data=point_data,
+            velocity=velocity,
+            indexing='xy',
+            k_to_delete=k_to_delete,
+            mirrored_x=True,
+            top_down=True,
+            **kwargs)
+
+# formerly `write_binary_vtk_cartesian_file`
+def write_binary_vtk_on_cartesian_grid(filename,
+                                       x_grid, y_grid, z_grid,
+                                       point_data=None,
+                                       **kwargs):
+    return write_binary_structured_vtk(
+            filename,
+            x_grid, y_grid, z_grid,
+            point_data=point_data,
+            indexing='ij',
+            mirrored_x=False,
+            top_down=False,
+            **kwargs)
+
+
+def write_vtk_map(x, y, ids, filename, zlo=1e-12,
+                  point_data=None):
+    """
+    Write an ASCII polydata VTK file containing borders for specified
+    regions.
+
+    Parameters:
+        x (list or ndarray): x coordinates
+        y (list or ndarray): y coordinates
+        ids (list or ndarray): state/country/region indices
+            corresponding to each (x, y)
+        filename (str): Name of the output VTK file.
+        zlo (float): constant elevation value
+        point_data (dict): scalar data associated with points, optional
+    """
+    x = np.asarray(x)
+    y = np.asarray(y)
+    ids = np.asarray(ids)
+
+    if len(x) != len(y) or len(x) != len(ids):
+        raise ValueError("The length of x, y, and ids must be the same.")
+
+    # Open VTK file for writing
+    with open(filename, 'w') as vtk_file:
+        # Write VTK header
+        vtk_file.write("# vtk DataFile Version 3.0\n")
+        vtk_file.write("State borders\n")
+        vtk_file.write("ASCII\n")
+        vtk_file.write("DATASET POLYDATA\n")
+
+        # Group points by region
+        unique_ids = np.unique(ids)
+        points = []  # List of all points
+        lines = []  # List of all lines
+
+        # Process each region
+        for region in unique_ids:
+            region_indices = np.where(ids == region)[0]
+            region_points = [(x[i], y[i]) for i in region_indices]
+            start_idx = len(points)  # Starting index for this region's points
+            points.extend(region_points)
+
+            # Create line segments connecting adjacent points
+            for i in range(len(region_points) - 1):
+                lines.append((start_idx + i, start_idx + i + 1))
+
+        # Write points
+        vtk_file.write(f"POINTS {len(points)} float\n")
+        for px, py in points:
+            vtk_file.write(f"{px} {py} {zlo}\n")
+
+        # Write lines
+        vtk_file.write(f"LINES {len(lines)} {3 * len(lines)}\n")
+        for p1, p2 in lines:
+            vtk_file.write(f"2 {p1} {p2}\n")
+
+        # Write point data (optional)
+        if point_data is not None:
+            vtk_file.write(f"POINT_DATA {len(points)}\n")
+
+            for name, values in point_data.items():
+                assert len(values) == len(points)
+                vtk_file.write(f"SCALARS {name} double\n")
+                vtk_file.write("LOOKUP_TABLE default\n")
+                valstr = '\n'.join([f"{val:.14g}" for val in values])
+                vtk_file.write(valstr+'\n')