Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Archive/DatasetReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,14 +245,14 @@ def convertOutputsToNumpyArrays(self, included_landcover_values, data, bandsToKe
"""
Description:
Converts the dictionary or list of dictionaries supplied by the output
of TripletReader.read_all() or TripletReader.read_next() to numpy arrays of
of TripletReader.getAlignedData_composites() to numpy arrays of
inputs and labels used to feed a random forest model.
Inputs:
self - Instance of this class
included_landcover_values - List of integers - Omits pixels that do not contain a landcover value that falls in the list
bandsToKeep - List of strings - Band names we want to keep as features
data - list of dictionaries
Format is present in class description of TripletReader.read_all() or TripletReader.read_next()
Format is present in class description of TripletReader.getAlignedData_composites()
classBalanceFilter - None, or Dictionary that includes counts of how many of each type of label should be present
EX:
{
Expand Down
255 changes: 89 additions & 166 deletions Src/Modules/FileTripletReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,106 +37,16 @@
from shapely.geometry import Polygon
from rasterio.features import geometry_mask
import rasterio.mask
import xarray as xr
import rioxarray
from geocube.api.core import make_geocube

# Custom modules
sys.path.insert(0, '../src/modules')
import HelperFunctions
import kmlReader
import TifReader

###############################################################################
###############################################################################
###############################################################################
###############################################################################
def getAlignedData_RGB_basemaps(landcoverPath, satellitePath, kmlPath):
"""
Description:
Reads in a kml annotations tif file, a landcover tif file, and a satellite tif file
and then returns aligned numpy arrays of the landcover data, masked kml overlap, and sat data
Inputs:
self - Instance of this class
landcoverPath -
satellitePath -
kmlPath -
Output:
-1 if files dont overlap

If everything works,
Dictionary of format
{
bands:
{
R : np.ndarray of size (N,m) <---- This NxM size can be different for different files
G : np.ndarray of size (N,m)
B : np.ndarray of size (N,m)
Landcover : np.ndarray of size (N,m)
Annotations: np,ndarry of size (N,m)
}
}
"""
#------------------------------------------------------------------------
# KML Annotations file
KML_READER = kmlReader.KML_Annotations_Reader()
EPSG32610_polygons = KML_READER.get_EPSG32610_coordinates(kmlPath)

# Get bounding box of polygons
xmin = 999999999999999
xmax = -999999999999999
ymin = 999999999999999
ymax = -999999999999999
for poly in EPSG32610_polygons:
for coords in poly:
x = coords[0]
y = coords[1]
if(x < xmin): xmin = x
elif(x > xmax): xmax = x
if(y < ymin): ymin = y
elif(y > ymax): ymax = y

# Get shapely polygons
shapely_polygons = [Polygon(coords) for coords in EPSG32610_polygons]
gdf = gpd.GeoDataFrame(geometry=shapely_polygons, crs='EPSG:32610')
bbox = shapely.geometry.box(xmin, ymin, xmax, ymax)

# First one is smaller one, second is bigger one (in terms of shape)
with rasterio.open(satellitePath, "r") as sat_ras, rasterio.open(landcoverPath) as lc_ras: #<class 'rasterio.io.DatasetReader'>

# Get mask data for satellite data/kml
mask, out_transform, win = rasterio.mask.raster_geometry_mask(sat_ras, shapely_polygons, crop=True)
mask = np.where(mask == True, -1, mask) #Convert TRUE, FALSE to 0, 1
mask = np.where(mask == False, 1, mask)
mask = np.where(mask == -1, 0, mask)

# Get mask data for satellite data/kml (We should think about adding better error handling here)
try:
mask2, out_transform2, win2 = rasterio.mask.raster_geometry_mask(lc_ras, shapely_polygons, crop=True)
mask2 = np.where(mask2 == True, -1, mask2) #Convert TRUE, FALSE to 0, 1
mask2 = np.where(mask2 == False, 1, mask2)
mask2 = np.where(mask2 == -1, 0, mask2)
except:
print("Landcover file does not overlap with hand annotation file, skipping")
return -1

# Scale the sat/landcover data relative to the kml file
x_scale_sat = sat_ras.transform.a / out_transform.a
y_scale_sat = sat_ras.transform.e / out_transform.e
x_scale_lc = lc_ras.transform.a / out_transform2.a
y_scale_lc = lc_ras.transform.e / out_transform2.e

# Get the overlap masks using windows and shape scaling
overlap_1 = sat_ras.read(window=win,out_shape=(sat_ras.count,int(win.height * y_scale_sat),int(win.width * x_scale_sat)),
resampling=Resampling.bilinear
)
overlap_2 = lc_ras.read(window=win2,out_shape=(lc_ras.count, int(win.height * y_scale_sat),int(win.width * x_scale_sat)),
resampling=Resampling.bilinear
)

sat_ras.close()
lc_ras.close()

bands = {"R": overlap_1[0], "G": overlap_1[1], "B": overlap_1[2], "Landcover": overlap_2[0], "Annotations": mask}
outputData = {"bands": bands}
return outputData
# Required to reaed kml files through geopandas
from fiona.drvsupport import supported_drivers
supported_drivers['LIBKML'] = 'rw'

###############################################################################
###############################################################################
Expand Down Expand Up @@ -171,79 +81,92 @@ def getAlignedData_composites(landcoverPath, satellitePath, kmlPath):
}
"""
#------------------------------------------------------------------------
# KML Annotations file
KML_READER = kmlReader.KML_Annotations_Reader()
EPSG32610_polygons = KML_READER.get_EPSG32610_coordinates(kmlPath)
# Example test paths
# kmlPath = "/home/mariehoeger/Downloads/Annotations/5f383bac-d30d-4fde-b3ef-65c7c95baa75/5f383bac-d30d-4fde-b3ef-65c7c95baa75.kml"
# satellite_file_path = "/home/mariehoeger/Downloads/spectral_indices_3_2019.tif"
# landcover_file_path = "/home/mariehoeger/Downloads/landcover_2019 (1) (1)/LCMS_CONUS_v2022-8_2019.Land_Cover.tif"
#------------------------------------------------------------------------
satellite_crs = HelperFunctions.get_raster_crs(satellitePath)
try:
annotations = gpd.read_file(kmlPath)
except:
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
annotations = gpd.read_file(kmlPath)
annotations.to_crs(crs=satellite_crs, inplace=True)
annotations["Annotations"] = 0

# Get bounding box of polygons
xmin = 999999999999999
xmax = -999999999999999
ymin = 999999999999999
ymax = -999999999999999
for poly in EPSG32610_polygons:
for coords in poly:
x = coords[0]
y = coords[1]
if(x < xmin): xmin = x
elif(x > xmax): xmax = x
if(y < ymin): ymin = y
elif(y > ymax): ymax = y
# Get total bounds of annotations
xmin = min(annotations.bounds.minx)
ymin = min(annotations.bounds.miny)
xmax = max(annotations.bounds.maxx)
ymax = max(annotations.bounds.maxy)

# Get shapely polygons
shapely_polygons = [Polygon(coords) for coords in EPSG32610_polygons]
gdf = gpd.GeoDataFrame(geometry=shapely_polygons, crs='EPSG:32610')
bbox = shapely.geometry.box(xmin, ymin, xmax, ymax)

# First one is smaller one, second is bigger one (in terms of shape)
with rasterio.open(satellitePath, "r") as sat_ras, rasterio.open(landcoverPath) as lc_ras: #<class 'rasterio.io.DatasetReader'>
# Get mask data for satellite data/kml
try:
mask, out_transform, win = rasterio.mask.raster_geometry_mask(sat_ras, shapely_polygons, crop=True)
mask = np.where(mask == True, -1, mask) #Convert TRUE, FALSE to 0, 1
mask = np.where(mask == False, 1, mask)
mask = np.where(mask == -1, 0, mask)
except Exception as error:
print(f"Error: {error}, {satellitePath} does not overlap with {kmlPath}, skipping")
return -1

# Get mask data for satellite data/kml (We should think about adding better error handling here)
try:
mask2, out_transform2, win2 = rasterio.mask.raster_geometry_mask(lc_ras, shapely_polygons, crop=True)
mask2 = np.where(mask2 == True, -1, mask2) #Convert TRUE, FALSE to 0, 1
mask2 = np.where(mask2 == False, 1, mask2)
mask2 = np.where(mask2 == -1, 0, mask2)
except:
print(f"{landcoverPath} does not overlap with {kmlPath}, skipping")
return -1

# Scale the sat/landcover data relative to the kml file
x_scale_sat = sat_ras.transform.a / out_transform.a
y_scale_sat = sat_ras.transform.e / out_transform.e
x_scale_lc = lc_ras.transform.a / out_transform2.a
y_scale_lc = lc_ras.transform.e / out_transform2.e

# Get the overlap masks using windows and shape scaling
overlap_1 = sat_ras.read(window=win,out_shape=(sat_ras.count,int(win.height * y_scale_sat),int(win.width * x_scale_sat)),
resampling=Resampling.bilinear
)
overlap_2 = lc_ras.read(window=win2,out_shape=(lc_ras.count, int(win.height * y_scale_sat),int(win.width * x_scale_sat)),
resampling=Resampling.bilinear
)

sat_ras.close()
lc_ras.close()
bands = {
"R": overlap_1[0],
"G": overlap_1[1],
"B": overlap_1[2],
"NIR": overlap_1[3],
"NDVI": overlap_1[4],
"GNDVI": overlap_1[5],
"RGIndex": overlap_1[6],
"Landcover": overlap_2[0],
"Annotations": mask
sat_ras = rioxarray.open_rasterio(satellitePath)
lc_ras = rioxarray.open_rasterio(landcoverPath)

if sat_ras.rio.crs != lc_ras.rio.crs:
print("Landcover and satellite data do not have the same CRS, reprojecting landcover data to match satellite data")

# we reproject anyways to make sure the grids align
lc_ras = lc_ras.rio.reproject_match(sat_ras)

# get lc_ras bbox
lc_ras_bbox = shapely.geometry.box(*lc_ras.rio.bounds())
# get sat_ras bbox
sat_ras_bbox = shapely.geometry.box(*sat_ras.rio.bounds())

# Make sure annotations overlap with both rasters
if not all(lc_ras_bbox.contains(annotations.geometry)):
print(f"{landcoverPath} does not overlap with {kmlPath}, skipping")
return -1

if not all(sat_ras_bbox.contains(annotations.geometry)):
print(f"{satellitePath} does not overlap with {kmlPath}, skipping")
return -1

# Clip to bbox of annotations
lc_ras = lc_ras.rio.clip([bbox])
sat_ras = sat_ras.rio.clip([bbox])

# Remame bands
sat_ras["band"] = ["R", "G", "B", "NIR", "NDVI", "GNDVI", "RGIndex"]
lc_ras["band"] = ["Landcover"]

# Make annotations vector data into raster data
annotation_raster = make_geocube(
vector_data=annotations,
measurements=["Annotations"],
fill=1,
like=sat_ras, # ensure the data are on the same grid
)
annotation_raster = annotation_raster.to_array(dim="band")

# This is an xarray object that is easier to work with. It likely
# makes sense to refactor downstream code to work directly off of
# final_data instead of the custom dictionary.
final_data = xr.concat(
[
sat_ras,
annotation_raster,
lc_ras,
], dim='band',
)

outputData = {
"bands": {
"R": final_data.sel(band="R").values,
"G": final_data.sel(band="G").values,
"B": final_data.sel(band="B").values,
"NIR": final_data.sel(band="NIR").values,
"NDVI": final_data.sel(band="NDVI").values,
"GNDVI": final_data.sel(band="GNDVI").values,
"RGIndex": final_data.sel(band="RGIndex").values,
"Landcover": final_data.sel(band="Landcover").values,
"Annotations": final_data.sel(band="Annotations").values,
}
outputData = {"bands": bands}
}
return outputData

###############################################################################
Expand All @@ -254,7 +177,7 @@ def convertOutputsToNumpyArrays(included_landcover_values, data, bandsToKeep, cl
"""
Description:
Converts the dictionary or list of dictionaries supplied by the output
of TripletReader.read_all() or TripletReader.read_next() to numpy arrays of
of TripletReader.getAlignedData_composites() to numpy arrays of
inputs and labels used to feed a random forest model.
IMPORTANT: Reads all data into memory
Inputs:
Expand Down Expand Up @@ -338,4 +261,4 @@ def convertOutputsToNumpyArrays(included_landcover_values, data, bandsToKeep, cl
label_data = label_data[conditional_label_indices]

# Apply conditional indices to get a subset of pixels that meet all the supplied conditions
return {"inputs": input_data, "labels": label_data, "class_balance": classBalance}
return {"inputs": input_data, "labels": label_data, "class_balance": classBalance}
19 changes: 18 additions & 1 deletion Src/Modules/HelperFunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import tqdm
import requests
from pyproj import Transformer
import rasterio

###############################################################################
###############################################################################
Expand Down Expand Up @@ -169,4 +170,20 @@ def convert_EPSG3857_to_CRS84(coordinates):
# Flip these to get EPSG:4326 to CRS84
new = transformer.transform(coordinate[0], coordinate[1])
new_coords.append([new[1], new[0]])
return new_coords
return new_coords

###############################################################################
###############################################################################
###############################################################################
###############################################################################
def get_raster_crs(filepath: str):
"""
Description:
Returns the coordinate reference system of a raster file
Inputs:
filepath - String
Outputs:
String
"""
with rasterio.open(filepath, 'r') as src:
return src.crs.to_string()
Loading