diff --git a/Archive/DatasetReader.py b/Archive/DatasetReader.py index 2a727ec..3160c2f 100644 --- a/Archive/DatasetReader.py +++ b/Archive/DatasetReader.py @@ -245,14 +245,14 @@ def convertOutputsToNumpyArrays(self, included_landcover_values, data, bandsToKe """ Description: Converts the dictionary or list of dictionaries supplied by the output - of TripletReader.read_all() or TripletReader.read_next() to numpy arrays of + of TripletReader.getAlignedData_composites() to numpy arrays of inputs and labels used to feed a random forest model. Inputs: self - Instance of this class included_landcover_values - List of integers - Omits pixels that do not contain a landcover value that falls in the list bandsToKeep - List of strings - Band names we want to keep as features data - list of dictionaries - Format is present in class description of TripletReader.read_all() or TripletReader.read_next() + Format is present in class description of TripletReader.getAlignedData_composites() classBalanceFilter - None, or Dictionary that includes counts of how many of each type of label should be present EX: { diff --git a/Src/Modules/FileTripletReader.py b/Src/Modules/FileTripletReader.py index 95dbd6c..6027da0 100644 --- a/Src/Modules/FileTripletReader.py +++ b/Src/Modules/FileTripletReader.py @@ -37,106 +37,16 @@ from shapely.geometry import Polygon from rasterio.features import geometry_mask import rasterio.mask +import xarray as xr +import rioxarray +from geocube.api.core import make_geocube # Custom modules sys.path.insert(0, '../src/modules') import HelperFunctions -import kmlReader -import TifReader - -############################################################################### -############################################################################### -############################################################################### -############################################################################### -def getAlignedData_RGB_basemaps(landcoverPath, satellitePath, kmlPath): - """ - Description: - Reads in a kml annotations tif file, a landcover tif file, and a satellite tif file - and then returns aligned numpy arrays of the landcover data, masked kml overlap, and sat data - Inputs: - self - Instance of this class - landcoverPath - - satellitePath - - kmlPath - - Output: - -1 if files dont overlap - - If everything works, - Dictionary of format - { - bands: - { - R : np.ndarray of size (N,m) <---- This NxM size can be different for different files - G : np.ndarray of size (N,m) - B : np.ndarray of size (N,m) - Landcover : np.ndarray of size (N,m) - Annotations: np,ndarry of size (N,m) - } - } - """ - #------------------------------------------------------------------------ - # KML Annotations file - KML_READER = kmlReader.KML_Annotations_Reader() - EPSG32610_polygons = KML_READER.get_EPSG32610_coordinates(kmlPath) - - # Get bounding box of polygons - xmin = 999999999999999 - xmax = -999999999999999 - ymin = 999999999999999 - ymax = -999999999999999 - for poly in EPSG32610_polygons: - for coords in poly: - x = coords[0] - y = coords[1] - if(x < xmin): xmin = x - elif(x > xmax): xmax = x - if(y < ymin): ymin = y - elif(y > ymax): ymax = y - - # Get shapely polygons - shapely_polygons = [Polygon(coords) for coords in EPSG32610_polygons] - gdf = gpd.GeoDataFrame(geometry=shapely_polygons, crs='EPSG:32610') - bbox = shapely.geometry.box(xmin, ymin, xmax, ymax) - - # First one is smaller one, second is bigger one (in terms of shape) - with rasterio.open(satellitePath, "r") as sat_ras, rasterio.open(landcoverPath) as lc_ras: # - - # Get mask data for satellite data/kml - mask, out_transform, win = rasterio.mask.raster_geometry_mask(sat_ras, shapely_polygons, crop=True) - mask = np.where(mask == True, -1, mask) #Convert TRUE, FALSE to 0, 1 - mask = np.where(mask == False, 1, mask) - mask = np.where(mask == -1, 0, mask) - - # Get mask data for satellite data/kml (We should think about adding better error handling here) - try: - mask2, out_transform2, win2 = rasterio.mask.raster_geometry_mask(lc_ras, shapely_polygons, crop=True) - mask2 = np.where(mask2 == True, -1, mask2) #Convert TRUE, FALSE to 0, 1 - mask2 = np.where(mask2 == False, 1, mask2) - mask2 = np.where(mask2 == -1, 0, mask2) - except: - print("Landcover file does not overlap with hand annotation file, skipping") - return -1 - - # Scale the sat/landcover data relative to the kml file - x_scale_sat = sat_ras.transform.a / out_transform.a - y_scale_sat = sat_ras.transform.e / out_transform.e - x_scale_lc = lc_ras.transform.a / out_transform2.a - y_scale_lc = lc_ras.transform.e / out_transform2.e - - # Get the overlap masks using windows and shape scaling - overlap_1 = sat_ras.read(window=win,out_shape=(sat_ras.count,int(win.height * y_scale_sat),int(win.width * x_scale_sat)), - resampling=Resampling.bilinear - ) - overlap_2 = lc_ras.read(window=win2,out_shape=(lc_ras.count, int(win.height * y_scale_sat),int(win.width * x_scale_sat)), - resampling=Resampling.bilinear - ) - - sat_ras.close() - lc_ras.close() - - bands = {"R": overlap_1[0], "G": overlap_1[1], "B": overlap_1[2], "Landcover": overlap_2[0], "Annotations": mask} - outputData = {"bands": bands} - return outputData +# Required to reaed kml files through geopandas +from fiona.drvsupport import supported_drivers +supported_drivers['LIBKML'] = 'rw' ############################################################################### ############################################################################### @@ -171,79 +81,92 @@ def getAlignedData_composites(landcoverPath, satellitePath, kmlPath): } """ #------------------------------------------------------------------------ - # KML Annotations file - KML_READER = kmlReader.KML_Annotations_Reader() - EPSG32610_polygons = KML_READER.get_EPSG32610_coordinates(kmlPath) + # Example test paths + # kmlPath = "/home/mariehoeger/Downloads/Annotations/5f383bac-d30d-4fde-b3ef-65c7c95baa75/5f383bac-d30d-4fde-b3ef-65c7c95baa75.kml" + # satellite_file_path = "/home/mariehoeger/Downloads/spectral_indices_3_2019.tif" + # landcover_file_path = "/home/mariehoeger/Downloads/landcover_2019 (1) (1)/LCMS_CONUS_v2022-8_2019.Land_Cover.tif" + #------------------------------------------------------------------------ + satellite_crs = HelperFunctions.get_raster_crs(satellitePath) + try: + annotations = gpd.read_file(kmlPath) + except: + gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw' + annotations = gpd.read_file(kmlPath) + annotations.to_crs(crs=satellite_crs, inplace=True) + annotations["Annotations"] = 0 - # Get bounding box of polygons - xmin = 999999999999999 - xmax = -999999999999999 - ymin = 999999999999999 - ymax = -999999999999999 - for poly in EPSG32610_polygons: - for coords in poly: - x = coords[0] - y = coords[1] - if(x < xmin): xmin = x - elif(x > xmax): xmax = x - if(y < ymin): ymin = y - elif(y > ymax): ymax = y + # Get total bounds of annotations + xmin = min(annotations.bounds.minx) + ymin = min(annotations.bounds.miny) + xmax = max(annotations.bounds.maxx) + ymax = max(annotations.bounds.maxy) - # Get shapely polygons - shapely_polygons = [Polygon(coords) for coords in EPSG32610_polygons] - gdf = gpd.GeoDataFrame(geometry=shapely_polygons, crs='EPSG:32610') bbox = shapely.geometry.box(xmin, ymin, xmax, ymax) - # First one is smaller one, second is bigger one (in terms of shape) - with rasterio.open(satellitePath, "r") as sat_ras, rasterio.open(landcoverPath) as lc_ras: # - # Get mask data for satellite data/kml - try: - mask, out_transform, win = rasterio.mask.raster_geometry_mask(sat_ras, shapely_polygons, crop=True) - mask = np.where(mask == True, -1, mask) #Convert TRUE, FALSE to 0, 1 - mask = np.where(mask == False, 1, mask) - mask = np.where(mask == -1, 0, mask) - except Exception as error: - print(f"Error: {error}, {satellitePath} does not overlap with {kmlPath}, skipping") - return -1 - - # Get mask data for satellite data/kml (We should think about adding better error handling here) - try: - mask2, out_transform2, win2 = rasterio.mask.raster_geometry_mask(lc_ras, shapely_polygons, crop=True) - mask2 = np.where(mask2 == True, -1, mask2) #Convert TRUE, FALSE to 0, 1 - mask2 = np.where(mask2 == False, 1, mask2) - mask2 = np.where(mask2 == -1, 0, mask2) - except: - print(f"{landcoverPath} does not overlap with {kmlPath}, skipping") - return -1 - - # Scale the sat/landcover data relative to the kml file - x_scale_sat = sat_ras.transform.a / out_transform.a - y_scale_sat = sat_ras.transform.e / out_transform.e - x_scale_lc = lc_ras.transform.a / out_transform2.a - y_scale_lc = lc_ras.transform.e / out_transform2.e - - # Get the overlap masks using windows and shape scaling - overlap_1 = sat_ras.read(window=win,out_shape=(sat_ras.count,int(win.height * y_scale_sat),int(win.width * x_scale_sat)), - resampling=Resampling.bilinear - ) - overlap_2 = lc_ras.read(window=win2,out_shape=(lc_ras.count, int(win.height * y_scale_sat),int(win.width * x_scale_sat)), - resampling=Resampling.bilinear - ) - - sat_ras.close() - lc_ras.close() - bands = { - "R": overlap_1[0], - "G": overlap_1[1], - "B": overlap_1[2], - "NIR": overlap_1[3], - "NDVI": overlap_1[4], - "GNDVI": overlap_1[5], - "RGIndex": overlap_1[6], - "Landcover": overlap_2[0], - "Annotations": mask + sat_ras = rioxarray.open_rasterio(satellitePath) + lc_ras = rioxarray.open_rasterio(landcoverPath) + + if sat_ras.rio.crs != lc_ras.rio.crs: + print("Landcover and satellite data do not have the same CRS, reprojecting landcover data to match satellite data") + + # we reproject anyways to make sure the grids align + lc_ras = lc_ras.rio.reproject_match(sat_ras) + + # get lc_ras bbox + lc_ras_bbox = shapely.geometry.box(*lc_ras.rio.bounds()) + # get sat_ras bbox + sat_ras_bbox = shapely.geometry.box(*sat_ras.rio.bounds()) + + # Make sure annotations overlap with both rasters + if not all(lc_ras_bbox.contains(annotations.geometry)): + print(f"{landcoverPath} does not overlap with {kmlPath}, skipping") + return -1 + + if not all(sat_ras_bbox.contains(annotations.geometry)): + print(f"{satellitePath} does not overlap with {kmlPath}, skipping") + return -1 + + # Clip to bbox of annotations + lc_ras = lc_ras.rio.clip([bbox]) + sat_ras = sat_ras.rio.clip([bbox]) + + # Remame bands + sat_ras["band"] = ["R", "G", "B", "NIR", "NDVI", "GNDVI", "RGIndex"] + lc_ras["band"] = ["Landcover"] + + # Make annotations vector data into raster data + annotation_raster = make_geocube( + vector_data=annotations, + measurements=["Annotations"], + fill=1, + like=sat_ras, # ensure the data are on the same grid + ) + annotation_raster = annotation_raster.to_array(dim="band") + + # This is an xarray object that is easier to work with. It likely + # makes sense to refactor downstream code to work directly off of + # final_data instead of the custom dictionary. + final_data = xr.concat( + [ + sat_ras, + annotation_raster, + lc_ras, + ], dim='band', + ) + + outputData = { + "bands": { + "R": final_data.sel(band="R").values, + "G": final_data.sel(band="G").values, + "B": final_data.sel(band="B").values, + "NIR": final_data.sel(band="NIR").values, + "NDVI": final_data.sel(band="NDVI").values, + "GNDVI": final_data.sel(band="GNDVI").values, + "RGIndex": final_data.sel(band="RGIndex").values, + "Landcover": final_data.sel(band="Landcover").values, + "Annotations": final_data.sel(band="Annotations").values, } - outputData = {"bands": bands} + } return outputData ############################################################################### @@ -254,7 +177,7 @@ def convertOutputsToNumpyArrays(included_landcover_values, data, bandsToKeep, cl """ Description: Converts the dictionary or list of dictionaries supplied by the output - of TripletReader.read_all() or TripletReader.read_next() to numpy arrays of + of TripletReader.getAlignedData_composites() to numpy arrays of inputs and labels used to feed a random forest model. IMPORTANT: Reads all data into memory Inputs: @@ -338,4 +261,4 @@ def convertOutputsToNumpyArrays(included_landcover_values, data, bandsToKeep, cl label_data = label_data[conditional_label_indices] # Apply conditional indices to get a subset of pixels that meet all the supplied conditions - return {"inputs": input_data, "labels": label_data, "class_balance": classBalance} \ No newline at end of file + return {"inputs": input_data, "labels": label_data, "class_balance": classBalance} diff --git a/Src/Modules/HelperFunctions.py b/Src/Modules/HelperFunctions.py index 70b8f60..75e4699 100644 --- a/Src/Modules/HelperFunctions.py +++ b/Src/Modules/HelperFunctions.py @@ -15,6 +15,7 @@ import tqdm import requests from pyproj import Transformer +import rasterio ############################################################################### ############################################################################### @@ -169,4 +170,20 @@ def convert_EPSG3857_to_CRS84(coordinates): # Flip these to get EPSG:4326 to CRS84 new = transformer.transform(coordinate[0], coordinate[1]) new_coords.append([new[1], new[0]]) - return new_coords \ No newline at end of file + return new_coords + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +def get_raster_crs(filepath: str): + """ + Description: + Returns the coordinate reference system of a raster file + Inputs: + filepath - String + Outputs: + String + """ + with rasterio.open(filepath, 'r') as src: + return src.crs.to_string() \ No newline at end of file diff --git a/Src/Modules/TifReader.py b/Src/Modules/TifReader.py index 1e4d201..d695515 100644 --- a/Src/Modules/TifReader.py +++ b/Src/Modules/TifReader.py @@ -12,6 +12,8 @@ import rasterio import rasterio.features import rasterio.warp +import rioxarray +import xarray as xr from rasterio.merge import merge import matplotlib.pyplot as plt from shapely.geometry import Point, Polygon, box @@ -210,7 +212,7 @@ def combine_tifs(input_files, output_file): ############################################################################### ############################################################################### ############################################################################### -def get_resampled_overlap_of_2tifs(Tif1path, Tif2path): +def get_resampled_overlap_of_2tifs(satellite_file_path, landcover_file_path): """ Description: Given two .tif files assumed to have the same @@ -228,43 +230,16 @@ def get_resampled_overlap_of_2tifs(Tif1path, Tif2path): Where X = Number of rows of the intersection between both rasters Where Y = Number of columns of the intersection between both rasters """ - # Get overlap windows between landcover and sat images - with rasterio.open(Tif1path) as largerRaster, rasterio.open(Tif2path) as smallerRaster: - - # Find pixel scale ratios between the two rasters - x_scale = smallerRaster.transform.a / largerRaster.transform.a - y_scale = smallerRaster.transform.e / largerRaster.transform.e + sat_ras = rioxarray.open_rasterio(satellite_file_path) + lc_ras = rioxarray.open_rasterio(landcover_file_path) - # scale image transform - transform = smallerRaster.transform * smallerRaster.transform.scale( - (smallerRaster.width / smallerRaster.shape[-1]), - (smallerRaster.height / smallerRaster.shape[-2]) - ) - - # Get extents of each file and then the intersection - ext1 = box(*largerRaster.bounds) # - ext2 = box(*smallerRaster.bounds) # - intersection = ext1.intersection(ext2) # - - # Get area in the 1st raster where both tifs overlap. Resolution remains the same. - win1 = rasterio.windows.from_bounds(*intersection.bounds, largerRaster.transform) # - - # Get area in the 2nd raster where both tifs overlap. Resolution changes to match resolution of 1st image. - win2 = rasterio.windows.from_bounds(*intersection.bounds, transform) # - - # Get numpy data - largerRaster_numpy = largerRaster.read(window=win1) - smallerRaster_numpy = smallerRaster.read( - window=win2, - out_shape=( - smallerRaster.count, - int(win2.height * y_scale), - int(win2.width * x_scale) - ), - resampling=Resampling.bilinear - ) + # we reproject anyways to make sure the grids align + lc_ras = lc_ras.rio.reproject_match(sat_ras) - # Close files, return data - largerRaster.close() - smallerRaster.close() - return (largerRaster_numpy, smallerRaster_numpy) \ No newline at end of file + lc_box = box(*lc_ras.rio.bounds()) + sat_box = box(*sat_ras.rio.bounds()) + intersection = lc_box.intersection(sat_box) + lc_ras = lc_ras.rio.clip([intersection]) + sat_ras = sat_ras.rio.clip([intersection]) + + return (sat_ras.values, lc_ras.values) \ No newline at end of file diff --git a/Src/Train_RF.py b/Src/Train_RF.py index d485723..cffce48 100644 --- a/Src/Train_RF.py +++ b/Src/Train_RF.py @@ -35,7 +35,7 @@ class or the 'non red-tree' class. All non-tree Model.pkl <- Where random forest is saved Metadata.json <- Extra user defined context into the model Notes: - ASSUMES ALL DATA HAS A COORDINATE REFERENCE SYSTEM OF EPSG:3857 + ASSUMES ALL DATA CAN BE PROJECTED TO SAME COORDINATE REFERENCE SYSTEM Land_Cover Class Table: 1 #005e00 Trees 2 #008000 Tall Shrubs & Trees Mix (SEAK Only)