diff --git a/gedi/Pull_Clip_GEDI_Data.ipynb b/gedi/Pull_Clip_GEDI_Data.ipynb
new file mode 100644
index 0000000..a446e52
--- /dev/null
+++ b/gedi/Pull_Clip_GEDI_Data.ipynb
@@ -0,0 +1,891 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Pull_Clip_GEDI_Data",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pqdnBNGwb-yS"
+      },
+      "source": [
+        "Authored by Andre Otte, July 2021.\n",
+        "\n",
+        "This script is to download and clip all GEDI data that intersects with an area of interest. The script saves GeoJSON files to the file system for each GEDI .h5 download from EarthData. This was created to address the issue of GEDI files containing large amounts of unnecessary data when only looking at a specific area of interest. \n",
+        "\n",
+        "All [level 1B, 2A, and 2B](https://gedi.umd.edu/data/products/) files will be downloaded and all attributes converted to GeoJSON.\n",
+        "\n",
+        "This script works in the Colab environment, but honestly, it may be easier to get run locally assuming some familiarity with running Python from the command line. Because download and processing can take up to 10-15 minutes per file, runtime disconnect issues may occur if the browser is left unattended. Additionally, all the processed files will need to be saved locally in order to prevent losing them when running the script a second time.\n",
+        "\n",
+        "Potantial areas of improvement:\n",
+        "1. The clipping and processing code was taken from [GEDI Subsetter](https://git.earthdata.nasa.gov/projects/LPDUR/repos/gedi-subsetter/browse), a command line tool for clipping a directory of GEDI h5 files and outputting to GeoJSON. If offers more customizability than this notebook. More customizability could be  built into this script.\n",
+        "2. Some of the logic in the GEDI Susetter function could be abstracted out into smaller functions or added to a python package.\n",
+        "\n",
+        "TODO - get all the layers instead of just the defaults."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "XGhmQ7l2Ln1Q",
+        "outputId": "1d596cca-3ab1-4655-a3c4-a03dc9827b1c"
+      },
+      "source": [
+        "!pip install pyGEDI\n",
+        "!pip install geopandas\n",
+        "!pip install rtree\n",
+        "from pyGEDI import *\n",
+        "from datetime import date\n",
+        "from shapely.geometry import Polygon\n",
+        "import json\n",
+        "import geopandas\n",
+        "import os\n",
+        "import h5py\n",
+        "import pandas as pd\n",
+        "import sys\n",
+        "import numpy as np"
+      ],
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting pyGEDI\n",
+            "  Downloading https://files.pythonhosted.org/packages/d6/6c/f08e65c05fe95cabcf808a095d965c80023c24ed40520c31683ef25b16e8/pyGEDI-0.2.5.tar.gz\n",
+            "Building wheels for collected packages: pyGEDI\n",
+            "  Building wheel for pyGEDI (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for pyGEDI: filename=pyGEDI-0.2.5-cp37-none-any.whl size=4146 sha256=1891e3bbe699c147079b6afe54644960e77344e8beaf8ed157a09b0cec0fc877\n",
+            "  Stored in directory: /root/.cache/pip/wheels/82/ac/2b/90d7bedb2f8a4a4dafcd66cd8f3efa9939c268316256dc11fc\n",
+            "Successfully built pyGEDI\n",
+            "Installing collected packages: pyGEDI\n",
+            "Successfully installed pyGEDI-0.2.5\n",
+            "Collecting geopandas\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/d7/bf/e9cefb69d39155d122b6ddca53893b61535fa6ffdad70bf5ef708977f53f/geopandas-0.9.0-py2.py3-none-any.whl (994kB)\n",
+            "\u001b[K     |████████████████████████████████| 1.0MB 9.4MB/s \n",
+            "\u001b[?25hCollecting pyproj>=2.2.0\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/11/1d/1c54c672c2faf08d28fe78e15d664c048f786225bef95ad87b6c435cf69e/pyproj-3.1.0-cp37-cp37m-manylinux2010_x86_64.whl (6.6MB)\n",
+            "\u001b[K     |████████████████████████████████| 6.6MB 28.0MB/s \n",
+            "\u001b[?25hCollecting fiona>=1.8\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/9c/fc/9807326c37a6bfb2393ae3e1cca147aa74844562c4d5daa782d6e97ad2bc/Fiona-1.8.20-cp37-cp37m-manylinux1_x86_64.whl (15.4MB)\n",
+            "\u001b[K     |████████████████████████████████| 15.4MB 200kB/s \n",
+            "\u001b[?25hRequirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.7/dist-packages (from geopandas) (1.1.5)\n",
+            "Requirement already satisfied: shapely>=1.6 in /usr/local/lib/python3.7/dist-packages (from geopandas) (1.7.1)\n",
+            "Requirement already satisfied: certifi in /usr/local/lib/python3.7/dist-packages (from pyproj>=2.2.0->geopandas) (2021.5.30)\n",
+            "Collecting munch\n",
+            "  Downloading https://files.pythonhosted.org/packages/cc/ab/85d8da5c9a45e072301beb37ad7f833cd344e04c817d97e0cc75681d248f/munch-2.5.0-py2.py3-none-any.whl\n",
+            "Requirement already satisfied: attrs>=17 in /usr/local/lib/python3.7/dist-packages (from fiona>=1.8->geopandas) (21.2.0)\n",
+            "Collecting click-plugins>=1.0\n",
+            "  Downloading https://files.pythonhosted.org/packages/e9/da/824b92d9942f4e472702488857914bdd50f73021efea15b4cad9aca8ecef/click_plugins-1.1.1-py2.py3-none-any.whl\n",
+            "Requirement already satisfied: click>=4.0 in /usr/local/lib/python3.7/dist-packages (from fiona>=1.8->geopandas) (7.1.2)\n",
+            "Requirement already satisfied: six>=1.7 in /usr/local/lib/python3.7/dist-packages (from fiona>=1.8->geopandas) (1.15.0)\n",
+            "Collecting cligj>=0.5\n",
+            "  Downloading https://files.pythonhosted.org/packages/73/86/43fa9f15c5b9fb6e82620428827cd3c284aa933431405d1bcf5231ae3d3e/cligj-0.7.2-py3-none-any.whl\n",
+            "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from fiona>=1.8->geopandas) (57.2.0)\n",
+            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->geopandas) (2.8.1)\n",
+            "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->geopandas) (2018.9)\n",
+            "Requirement already satisfied: numpy>=1.15.4 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->geopandas) (1.19.5)\n",
+            "Installing collected packages: pyproj, munch, click-plugins, cligj, fiona, geopandas\n",
+            "Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.8.20 geopandas-0.9.0 munch-2.5.0 pyproj-3.1.0\n",
+            "Collecting rtree\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/51/05/5a67111cee91d2165a2bcb855f442186e3d76ddef834596cc84d4875c401/Rtree-0.9.7-cp37-cp37m-manylinux2010_x86_64.whl (994kB)\n",
+            "\u001b[K     |████████████████████████████████| 1.0MB 8.8MB/s \n",
+            "\u001b[?25hInstalling collected packages: rtree\n",
+            "Successfully installed rtree-0.9.7\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sajPvEvL4Hnh"
+      },
+      "source": [
+        "The following functions are taken from [pyGEDI](https://github.com/EduinHSERNA/pyGEDI/blob/master/pyGEDI/fuctions.py) and modified slightly to us to download one .h5 file at a time, allowing us to clip and process each file before downloading the next one."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "sdcIWZYRytUp"
+      },
+      "source": [
+        "def gediDownload(url, outdir, fileName, session):\n",
+        "  \"\"\"Download the GEDI file from EarthData and save it to a directory named GEDI Product/data collection day\n",
+        "  :param url: The EarthData download link for the .h5 file\n",
+        "  :param outdir: The root directory for the .h5 files\n",
+        "  :param session: The EarthData session\n",
+        "  \"\"\"\n",
+        "  print(f\"    Begin {fileName} download from EarthData.\")\n",
+        "  try:\n",
+        "    os.makedirs(outdir)\n",
+        "  except OSError:\n",
+        "    print (f\"    WANRING - Creation of the subdirectory {outdir} failed or already exists\")\n",
+        "  else:\n",
+        "    print (f\"    Created the subdirectory {outdir}\")  \n",
+        "\n",
+        "  path = outdir + fileName + \".h5\"\n",
+        "  \n",
+        "  with open(path, 'wb') as f:\n",
+        "    response = session.get(url, stream=True)\n",
+        "    total = response.headers.get('content-length')\n",
+        "    if total is None:\n",
+        "      f.write(response.content)\n",
+        "    else:\n",
+        "      downloaded = 0\n",
+        "      total = int(total)\n",
+        "      for data in response.iter_content(chunk_size=max(int(total/1000), 1024*1024)):\n",
+        "        downloaded += len(data)\n",
+        "        f.write(data)\n",
+        "        done = int(100*downloaded/total)\n",
+        "        gb=float(total/1073741824)\n",
+        "\n",
+        "        sys.stdout.write('\\r' + '   ' +url[url.rfind(':')+52:]+' | '+str(gb)[:5]+'GB | '+ str(100*downloaded/total)+ '% [{}{}]'.format('█' * done, '.' * (100 -done)))\n",
+        "        sys.stdout.flush()\n",
+        "  sys.stdout.write('\\n')\n",
+        "  print(f\"    {fileName} download complete.\")\n"
+      ],
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vOnE530Z5G2i"
+      },
+      "source": [
+        "Earthdata has a tool called [GEDI Finder](https://lpdaacsvc.cr.usgs.gov/services/gedifinder) that returns a list of files for a given AOI. This function will call the Gedi Finder Web Service and return the list of files to download."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NhSV1bZz0Uda"
+      },
+      "source": [
+        "def getGediDownloadLinks(product, version, bbox):\n",
+        "  \"\"\"Get a list of download links that intersect an AOI from the GEDI Finder web service.\n",
+        "  :param product: The GEDI product. Options - 1B, 2A, or 2B\n",
+        "  :param version: The GEDI production version. Option - 001\n",
+        "  :param bbox: An area of interest as an array containing the upper left lat, upper left long, lower right lat and lower right long coordinates - \n",
+        "   [ul_lat,ul_lon,lr_lat,lr_lon]\n",
+        "  \"\"\"\n",
+        "  bboxStr = bbox[0] + ',' + bbox[1] + ',' +  bbox[2] + ',' + bbox[3]\n",
+        "  url='https://lpdaacsvc.cr.usgs.gov/services/gedifinder?product='+product+'&version='+str(version)+'&bbox='+bboxStr+'&output=json'\n",
+        "  \n",
+        "  print(f\"{product} downloads: {url}\")\n",
+        "\n",
+        "  content=requests.get(url)\n",
+        "  listh5=content.json().get('data')\n",
+        "  return listh5\n",
+        "  "
+      ],
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "paVigP5ttd4f"
+      },
+      "source": [
+        "This function was created from the EarthData [GEDI Subsetter](https://lpdaac.usgs.gov/news/release-gedi-subsetter-data-prep-script/) command line tool. It takes in an area of interest and a file path and returns a clipped GeoDataFrame. There are left over, unused chunks of code in here. I did not remove partially due to laziness, but also to leave the option open for modification in the future."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "11IEjGSOaYwb"
+      },
+      "source": [
+        "def gediSubsetter(aoi, inFilePath, layers = None):\n",
+        "  \"\"\"Convert a GEDI h5 file to a GeoDataFrame\n",
+        "  :param aoi: a string list representing the Area of Interest.\n",
+        "  :param inFilePath: The file path of the GEDI h5 file.\n",
+        "  :param layers (optional): A list of layers that will be extracted out of the h5 file.\n",
+        "  \"\"\"  \n",
+        "  ROI = [float(aoi[0]), float(aoi[1]), float(aoi[2]), float(aoi[3])]\n",
+        "\n",
+        "  try:\n",
+        "      ROI = Polygon([(ROI[1], ROI[0]), (ROI[3], ROI[0]), (ROI[3], ROI[2]), (ROI[1], ROI[2])]) \n",
+        "      ROI.crs = 'EPSG:4326'\n",
+        "  except:\n",
+        "      print('    ERROR - unable to read input bounding box coordinates, the required format is: ul_lat,ul_lon,lr_lat,lr_lon')\n",
+        "      sys.exit(2)\n",
+        "\n",
+        "  # Keep the exact input geometry for the final clip to ROI\n",
+        "  finalClip = geopandas.GeoDataFrame([1], geometry=[ROI], crs='EPSG:4326')    \n",
+        "  \n",
+        "  # Find input directory\n",
+        "  try:\n",
+        "      os.chdir(inFilePath)\n",
+        "  except FileNotFoundError:\n",
+        "      print('    ERROR - input directory (--dir) provided does not exist or was not found')\n",
+        "      sys.exit(2)\n",
+        "\n",
+        "  beamSubset = ['BEAM0000', 'BEAM0001', 'BEAM0010', 'BEAM0011', 'BEAM0101', 'BEAM0110', 'BEAM1000', 'BEAM1011']\n",
+        "  layerSubset = None\n",
+        "      \n",
+        "  # -------------------------------------SET UP WORKSPACE------------------------------------------ #\n",
+        "  # Create list of GEDI HDF-EOS5 files in the directory\n",
+        "  gediFiles = [o for o in os.listdir() if o.endswith('.h5') and 'GEDI' in o]\n",
+        "\n",
+        "  # --------------------DEFINE PRESET BAND/LAYER SUBSETS ------------------------------------------ #\n",
+        "  # Default layers to be subset and exported, see README for information on how to add additional layers\n",
+        "  l1bSubset = [ '/geolocation/latitude_bin0', '/geolocation/longitude_bin0', '/channel', '/shot_number',\n",
+        "              '/rxwaveform','/rx_sample_count', '/stale_return_flag', '/tx_sample_count', '/txwaveform',\n",
+        "              '/geolocation/degrade', '/geolocation/delta_time', '/geolocation/digital_elevation_model',\n",
+        "              '/geolocation/solar_elevation',  '/geolocation/local_beam_elevation',  '/noise_mean_corrected',\n",
+        "              '/geolocation/elevation_bin0', '/geolocation/elevation_lastbin', '/geolocation/surface_type', '/geolocation/digital_elevation_model_srtm']\n",
+        "  l2aSubset = ['/lat_lowestmode', '/lon_lowestmode', '/channel', '/shot_number', '/degrade_flag', '/delta_time', \n",
+        "              '/digital_elevation_model', '/elev_lowestmode', '/quality_flag', '/rh', '/sensitivity', '/digital_elevation_model_srtm', \n",
+        "              '/elevation_bias_flag', '/surface_flag',  '/num_detectedmodes',  '/selected_algorithm',  '/solar_elevation']\n",
+        "  l2bSubset = ['/geolocation/lat_lowestmode', '/geolocation/lon_lowestmode', '/channel', '/geolocation/shot_number',\n",
+        "              '/cover', '/cover_z', '/fhd_normal', '/pai', '/pai_z',  '/rhov',  '/rhog',\n",
+        "              '/pavd_z', '/l2a_quality_flag', '/l2b_quality_flag', '/rh100', '/sensitivity',  \n",
+        "              '/stale_return_flag', '/surface_flag', '/geolocation/degrade_flag',  '/geolocation/solar_elevation',\n",
+        "              '/geolocation/delta_time', '/geolocation/digital_elevation_model', '/geolocation/elev_lowestmode']\n",
+        "\n",
+        "  # -------------------IMPORT GEDI FILES AS GEODATAFRAMES AND CLIP TO ROI-------------------------- #   \n",
+        "  # Loop through each GEDI file and export as a point geojson\n",
+        "  l = 0\n",
+        "  for g in gediFiles:\n",
+        "      l += 1\n",
+        "      print(f\"    Processing file: {g} ({l}/{len(gediFiles)})\")\n",
+        "      gedi = h5py.File(g, 'r')      # Open file\n",
+        "      gediName = g.split('.h5')[0]  # Keep original filename\n",
+        "      gedi_objs = []            \n",
+        "      gedi.visit(gedi_objs.append)  # Retrieve list of datasets  \n",
+        "\n",
+        "      # Search for relevant SDS inside data file\n",
+        "      gediSDS = [str(o) for o in gedi_objs if isinstance(gedi[o], h5py.Dataset)] \n",
+        "      \n",
+        "      # Define subset of layers based on product. If layers param comes in as null, use the dedault layers defined above. \n",
+        "      if layers == None:\n",
+        "        if 'GEDI01_B' in g:\n",
+        "            sdsSubset = l1bSubset\n",
+        "        elif 'GEDI02_A' in g:\n",
+        "            sdsSubset = l2aSubset \n",
+        "        else:\n",
+        "            sdsSubset = l2bSubset\n",
+        "      else:\n",
+        "        sdsSubset = layers\n",
+        "  \n",
+        "      # Append additional datasets if provided\n",
+        "      if layerSubset is not None:\n",
+        "          [sdsSubset.append(y) for y in layerSubset]\n",
+        "\n",
+        "      # Subset to the selected datasets\n",
+        "      gediSDS = [c for c in gediSDS if any(c.endswith(d) for d in sdsSubset)]\n",
+        "      \n",
+        "      # Get unique list of beams and subset to user-defined subset or default (all beams)\n",
+        "      beams = []\n",
+        "      for h in gediSDS:\n",
+        "          beam = h.split('/', 1)[0]\n",
+        "          if beam not in beams and beam in beamSubset:\n",
+        "              beams.append(beam)\n",
+        "\n",
+        "      gediDF = pd.DataFrame()  # Create empty dataframe to store GEDI datasets    \n",
+        "      del beam, gedi_objs, h\n",
+        "      \n",
+        "      # Loop through each beam and create a geodataframe with lat/lon for each shot, then clip to ROI\n",
+        "      for b in beams:\n",
+        "          beamSDS = [s for s in gediSDS if b in s]\n",
+        "          \n",
+        "          # Search for latitude, longitude, and shot number SDS\n",
+        "          lat = [l for l in beamSDS if sdsSubset[0] in l][0]  \n",
+        "          lon = [l for l in beamSDS if sdsSubset[1] in l][0]\n",
+        "          shot = f'{b}/shot_number'          \n",
+        "          \n",
+        "          # Open latitude, longitude, and shot number SDS\n",
+        "          shots = gedi[shot][()]\n",
+        "          lats = gedi[lat][()]\n",
+        "          lons = gedi[lon][()]\n",
+        "          \n",
+        "          # Append BEAM, shot number, latitude, longitude and an index to the GEDI dataframe\n",
+        "          geoDF = pd.DataFrame({'BEAM': len(shots) * [b], shot.split('/', 1)[-1].replace('/', '_'): shots,\n",
+        "                                'Latitude':lats, 'Longitude':lons, 'index': np.arange(0, len(shots), 1)})\n",
+        "          \n",
+        "          # Convert lat/lon coordinates to shapely points and append to geodataframe\n",
+        "          geoDF = geopandas.GeoDataFrame(geoDF, geometry=geopandas.points_from_xy(geoDF.Longitude, geoDF.Latitude))\n",
+        "          \n",
+        "          # Clip to only include points within the user-defined bounding box\n",
+        "          geoDF = geoDF[geoDF['geometry'].within(ROI.envelope)]    \n",
+        "          gediDF = gediDF.append(geoDF)\n",
+        "          del geoDF\n",
+        "      \n",
+        "      # Convert to geodataframe and add crs\n",
+        "      gediDF = geopandas.GeoDataFrame(gediDF)\n",
+        "      gediDF.crs = 'EPSG:4326'\n",
+        "      \n",
+        "      if gediDF.shape[0] == 0:\n",
+        "          print(f\"    WANRING - No intersecting shots were found between {g} and the region of interest submitted.\")\n",
+        "          continue\n",
+        "      del lats, lons, shots\n",
+        "      \n",
+        "  # --------------------------------OPEN SDS AND APPEND TO GEODATAFRAME---------------------------- #\n",
+        "      beamsDF = pd.DataFrame()  # Create dataframe to store SDS\n",
+        "      j = 0\n",
+        "      \n",
+        "      # Loop through each beam and extract subset of defined SDS\n",
+        "      for b in beams:\n",
+        "          beamDF = pd.DataFrame()\n",
+        "          beamSDS = [s for s in gediSDS if b in s and not any(s.endswith(d) for d in sdsSubset[0:3])]\n",
+        "          shot = f'{b}/shot_number'\n",
+        "          \n",
+        "          try:\n",
+        "              # set up indexes in order to retrieve SDS data only within the clipped subset from above\n",
+        "              mindex = min(gediDF[gediDF['BEAM'] == b]['index'])\n",
+        "              maxdex = max(gediDF[gediDF['BEAM'] == b]['index']) + 1\n",
+        "              shots = gedi[shot][mindex:maxdex]\n",
+        "          except ValueError:\n",
+        "              print(f\"    WARNING - No intersecting shots found for {b}\")\n",
+        "              continue\n",
+        "          # Loop through and extract each SDS subset and add to DF\n",
+        "          for s in beamSDS:\n",
+        "              j += 1\n",
+        "              sName = s.split('/', 1)[-1].replace('/', '_')\n",
+        "\n",
+        "              # Datasets with consistent structure as shots\n",
+        "              if gedi[s].shape == gedi[shot].shape:\n",
+        "                  beamDF[sName] = gedi[s][mindex:maxdex]  # Subset by index\n",
+        "              \n",
+        "              # Datasets with a length of one \n",
+        "              elif len(gedi[s][()]) == 1:\n",
+        "                  beamDF[sName] = [gedi[s][()][0]] * len(shots) # create array of same single value\n",
+        "              \n",
+        "              # Multidimensional datasets\n",
+        "              elif len(gedi[s].shape) == 2 and 'surface_type' not in s: \n",
+        "                  allData = gedi[s][()][mindex:maxdex]\n",
+        "                  \n",
+        "                  # For each additional dimension, create a new output column to store those data\n",
+        "                  for i in range(gedi[s].shape[1]):\n",
+        "                      step = []\n",
+        "                      for a in allData:\n",
+        "                          step.append(a[i])\n",
+        "                      beamDF[f\"{sName}_{i}\"] = step\n",
+        "              \n",
+        "              # Waveforms\n",
+        "              elif s.endswith('waveform') or s.endswith('pgap_theta_z'):\n",
+        "                  waveform = []\n",
+        "                  \n",
+        "                  if s.endswith('waveform'):\n",
+        "                      # Use sample_count and sample_start_index to identify the location of each waveform\n",
+        "                      start = gedi[f'{b}/{s.split(\"/\")[-1][:2]}_sample_start_index'][mindex:maxdex]\n",
+        "                      count = gedi[f'{b}/{s.split(\"/\")[-1][:2]}_sample_count'][mindex:maxdex]\n",
+        "                  \n",
+        "                  # for pgap_theta_z, use rx sample start index and count to subset\n",
+        "                  else:\n",
+        "                      # Use sample_count and sample_start_index to identify the location of each waveform\n",
+        "                      start = gedi[f'{b}/rx_sample_start_index'][mindex:maxdex]\n",
+        "                      count = gedi[f'{b}/rx_sample_count'][mindex:maxdex]\n",
+        "                  wave = gedi[s][()]\n",
+        "                  \n",
+        "                  # in the dataframe, each waveform will be stored as a list of values\n",
+        "                  for k in range(len(start)):\n",
+        "                      singleWF = wave[int(start[k] - 1): int(start[k] - 1 + count[k])]\n",
+        "                      waveform.append(','.join([str(q) for q in singleWF]))\n",
+        "                  beamDF[sName] = waveform\n",
+        "              \n",
+        "              # Surface type \n",
+        "              elif s.endswith('surface_type'):\n",
+        "                  surfaces = ['land', 'ocean', 'sea_ice', 'land_ice', 'inland_water']\n",
+        "                  allData = gedi[s][()]\n",
+        "                  for i in range(gedi[s].shape[0]):\n",
+        "                      beamDF[f'{surfaces[i]}'] = allData[i][mindex:maxdex]\n",
+        "                  del allData\n",
+        "              else:\n",
+        "                  print(f\"    SDS: {s} not found\")\n",
+        "          \n",
+        "          beamsDF = beamsDF.append(beamDF)\n",
+        "      del beamDF, beamSDS, beams, gedi, gediSDS, shots, sdsSubset\n",
+        "      \n",
+        "      # Combine geolocation dataframe with SDS layer dataframe\n",
+        "      outDF = pd.merge(gediDF, beamsDF, left_on='shot_number', right_on=[sn for sn in beamsDF.columns if sn.endswith('shot_number')][0])\n",
+        "      outDF.index = outDF['index']\n",
+        "      del gediDF, beamsDF   \n",
+        "      \n",
+        "      # Subset the output DF to the actual boundary of the input ROI\n",
+        "      outDF = geopandas.overlay(outDF, finalClip)\n",
+        "      del outDF[0] \n",
+        "\n",
+        "      return outDF"
+      ],
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7NjH3C6Gzu1x"
+      },
+      "source": [
+        "Set up an EarthData session and the area of interest bounding box. This is the only block that contains hard-coded values that can change.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Xa_-B4im2eQq"
+      },
+      "source": [
+        "username=\"\"\n",
+        "password=\"\"\n",
+        "session=sessionNASA(username,password)\n",
+        "rootDirectory = \"data\"\n",
+        "isColabEnvironment = True\n",
+        "\n",
+        "#The list of GEDI products\n",
+        "product_1B='GEDI01_B'\n",
+        "product_2A='GEDI02_A'\n",
+        "product_2B='GEDI02_B'\n",
+        "\n",
+        "#The GEDI product version\n",
+        "version='001'\n",
+        "\n",
+        "#The Area of Interest\n",
+        "ul_lat= '-13.76913'\n",
+        "ul_lon= '-44.0654'\n",
+        "lr_lat= '-13.67646'\n",
+        "lr_lon= '-44.17246'\n",
+        "\n",
+        "bbox=[ul_lat, ul_lon, lr_lat, lr_lon]"
+      ],
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tL_JChlpVIaN"
+      },
+      "source": [
+        "Get the download links for the GEDI files that intersect with the area of interest. Following the link will prompt a sign in to EarthData, after which the download will begin."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Heb6kMG6-cTO",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "99eaf683-8707-4384-c377-42a84e303c67"
+      },
+      "source": [
+        "downloadList2B = getGediDownloadLinks(product_2B,version,bbox)\n",
+        "downloadList2A = getGediDownloadLinks(product_2A,version,bbox)\n",
+        "downloadList1B = getGediDownloadLinks(product_1B,version,bbox)\n",
+        "\n",
+        "downloadList = downloadList2B + downloadList2A + downloadList1B\n",
+        "downloadList"
+      ],
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "GEDI02_B downloads: https://lpdaacsvc.cr.usgs.gov/services/gedifinder?product=GEDI02_B&version=001&bbox=-13.76913,-44.0654,-13.67646,-44.17246&output=json\n",
+            "GEDI02_A downloads: https://lpdaacsvc.cr.usgs.gov/services/gedifinder?product=GEDI02_A&version=001&bbox=-13.76913,-44.0654,-13.67646,-44.17246&output=json\n",
+            "GEDI01_B downloads: https://lpdaacsvc.cr.usgs.gov/services/gedifinder?product=GEDI01_B&version=001&bbox=-13.76913,-44.0654,-13.67646,-44.17246&output=json\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.001/2020.07.20/GEDI02_B_2020202065047_O09083_T05345_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.001/2020.04.21/GEDI02_B_2020112181526_O07695_T03922_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.001/2020.04.17/GEDI02_B_2020108194843_O07634_T02652_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.001/2020.01.10/GEDI02_B_2020010104413_O06109_T02652_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.001/2019.11.10/GEDI02_B_2019314105245_O05163_T01076_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.001/2019.10.14/GEDI02_B_2019287090415_O04743_T02491_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.001/2019.08.30/GEDI02_B_2019242025913_O04041_T01068_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.001/2019.07.17/GEDI02_B_2019198085218_O03362_T03922_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.001/2019.05.27/GEDI02_B_2019147050950_O02568_T05345_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_B.001/2019.04.18/GEDI02_B_2019108080338_O01964_T05337_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_A.001/2020.07.20/GEDI02_A_2020202065047_O09083_T05345_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_A.001/2020.04.21/GEDI02_A_2020112181526_O07695_T03922_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_A.001/2020.04.17/GEDI02_A_2020108194843_O07634_T02652_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_A.001/2020.01.10/GEDI02_A_2020010104413_O06109_T02652_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_A.001/2019.11.10/GEDI02_A_2019314105245_O05163_T01076_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_A.001/2019.10.14/GEDI02_A_2019287090415_O04743_T02491_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_A.001/2019.08.30/GEDI02_A_2019242025913_O04041_T01068_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_A.001/2019.07.17/GEDI02_A_2019198085218_O03362_T03922_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_A.001/2019.05.27/GEDI02_A_2019147050950_O02568_T05345_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI02_A.001/2019.04.18/GEDI02_A_2019108080338_O01964_T05337_02_001_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI01_B.001/2020.07.20/GEDI01_B_2020202065047_O09083_T05345_02_003_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI01_B.001/2020.04.21/GEDI01_B_2020112181526_O07695_T03922_02_003_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI01_B.001/2020.04.17/GEDI01_B_2020108194843_O07634_T02652_02_003_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI01_B.001/2020.01.10/GEDI01_B_2020010104413_O06109_T02652_02_003_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI01_B.001/2019.11.10/GEDI01_B_2019314105245_O05163_T01076_02_003_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI01_B.001/2019.10.14/GEDI01_B_2019287090415_O04743_T02491_02_003_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI01_B.001/2019.08.30/GEDI01_B_2019242025913_O04041_T01068_02_003_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI01_B.001/2019.07.17/GEDI01_B_2019198085218_O03362_T03922_02_003_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI01_B.001/2019.05.27/GEDI01_B_2019147050950_O02568_T05345_02_003_01.h5',\n",
+              " 'https://e4ftl01.cr.usgs.gov/GEDI/GEDI01_B.001/2019.04.18/GEDI01_B_2019108080338_O01964_T05337_02_003_01.h5']"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 6
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nCwbLmRI3irQ"
+      },
+      "source": [
+        "The following logic flows as follows:\n",
+        "\n",
+        "Foreach file we need to download -\n",
+        "1. Get the path the file will be downloaded to.\n",
+        "2. Download the file using the GEDI Finder web service.\n",
+        "3. Determine the list of laters we need to extract from the h5 file.\n",
+        "4. Using the gediSubsetter function, clip the h5 file, and convert the data to geojson.\n",
+        "5. Delete the raw h5 file. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "7acjvrpo23GA",
+        "outputId": "5321f655-5e3c-455c-a048-1b8fe61780ed"
+      },
+      "source": [
+        "count = 1\n",
+        "# origList = downloadList\n",
+        "# downloadList = [downloadList[0], downloadList[1], downloadList[11], downloadList[12], downloadList[21], downloadList[22]]\n",
+        "# downloadList = [downloadList[15],downloadList[15]]\n",
+        "# for url in downloadList:\n",
+        "for url in [downloadList[5],downloadList[6],downloadList[15],downloadList[16],downloadList[25],downloadList[26]]:\n",
+        "  #In the colab environment, the folders end up being nested. We need to cd into the root directory after each iteration.\n",
+        "  if(isColabEnvironment):\n",
+        "    os.chdir('/content/')\n",
+        "  \n",
+        "  #Get the name of the file we just downloaded and saved.\n",
+        "  fileNameh5 = re.search(\"GEDI\\d{2}_\\D_.*\", url).group(0).replace(\".h5\", \"\") # regex matches GEDI{01 or 02}_{A or B}_.*\n",
+        "  day = re.search(\"\\d{4}\\.\\d{2}\\.\\d{2}\", url).group(0) # regex matches date formatted 'yyyy.mm.dd'\n",
+        "  outdir = rootDirectory + os.sep + re.search(\"GEDI\\d{2}_\\D\\.\\d{3}\", url).group(0) + os.sep + day + os.sep #regex matches GEDI{01 or 02}_{A or B}.001\n",
+        "  product = re.search(\"GEDI\\d{2}_\\D\", url).group(0)\n",
+        "  filePathH5 = outdir + fileNameh5 + \".h5\"\n",
+        "\n",
+        "  print(f\"BEGIN DOWNLOAD AND PROCESSING {fileNameh5}. FILE {count} OF {str(len(downloadList))}.\")\n",
+        "\n",
+        "  #If the file exists in the filesystem, skip the download.  \n",
+        "  if not os.path.isfile(filePathH5):\n",
+        "    gediDownload(url, outdir, fileNameh5, session)\n",
+        "  else:\n",
+        "    print(f\"    File {fileNameh5} exists in file system. Skipping download.\")\n",
+        "  \n",
+        "  #Set up the layers to extract from GEDI file\n",
+        "  h5_2B=getH5(filePathH5)\n",
+        "\n",
+        "  #This returns a dictionary with {file name: [comma separated layers]} file name as the key and list of layers as the value\n",
+        "  layers = getLayer('',[h5_2B])[filePathH5] \n",
+        "  \n",
+        "  #The subsetter function needs the coordinate layers to be the first two layers in the list.\n",
+        "  if product == 'GEDI02_B':\n",
+        "    layers.insert(0, layers.pop(layers.index('geolocation/lat_lowestmode')))\n",
+        "    layers.insert(1, layers.pop(layers.index('geolocation/lon_lowestmode')))\n",
+        "  elif product == 'GEDI01_B':\n",
+        "    layers.insert(0, layers.pop(layers.index('geolocation/latitude_bin0')))\n",
+        "    layers.insert(1, layers.pop(layers.index('geolocation/longitude_bin0')))\n",
+        "  else: #product = 'GEDI02_A'\n",
+        "    layers.insert(0, layers.pop(layers.index('lat_lowestmode')))\n",
+        "    layers.insert(1, layers.pop(layers.index('lon_lowestmode')))\n",
+        "\n",
+        "  # geodataframe = gediSubsetter(bbox, outdir, layers) #Call to the subsetter with defined layers.\n",
+        "  geodataframe = gediSubsetter(bbox, outdir) #Call to the subsetter which will use the default laters.\n",
+        "\n",
+        "  #Add columns to the dataframe\n",
+        "  geodataframe['gedi_product'] = product\n",
+        "  geodataframe['gefi_file_name'] = fileNameh5\n",
+        "  geodataframe['data_collected_date'] = day\n",
+        "  geodataframe['accessed_date'] = str(date.today())\n",
+        "\n",
+        "  print(\"    Sample data from geodataframe:\")\n",
+        "  print(geodataframe.head())\n",
+        "  \n",
+        "  #Convert to GeoJSON\n",
+        "  print(\"    Converting geodataframe to json.\")\n",
+        "  geodataframe.to_file(fileNameh5 + '.json', driver=\"GeoJSON\")\n",
+        " \n",
+        "  print(f\"FINISHED PROCESSING {fileNameh5}.\")\n",
+        "  print(\"------------------------------------\")\n",
+        "  count += 1\n",
+        "  \n",
+        "  #In the colab environment, the folders end up being nested. We need to cd into the root directory after each iteration.\n",
+        "  if(isColabEnvironment):\n",
+        "    os.chdir('/content/')\n",
+        "  #Remove the raw .h5 file from the file system. \n",
+        "  try:\n",
+        "    os.remove(filePathH5)\n",
+        "  except FileNotFoundError:\n",
+        "      print(f\"    WARNING - h5 file failed to be removed.\")\n",
+        "      continue"
+      ],
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "BEGIN DOWNLOAD AND PROCESSING GEDI02_B_2019287090415_O04743_T02491_02_001_01. FILE 1 OF 30.\n",
+            "    Begin GEDI02_B_2019287090415_O04743_T02491_02_001_01 download from EarthData.\n",
+            "    Created the subdirectory data/GEDI02_B.001/2019.10.14/\n",
+            "   GEDI02_B_2019287090415_O04743_T02491_02_001_01.h5 | 1.874GB | 100.0% [████████████████████████████████████████████████████████████████████████████████████████████████████]\n",
+            "    GEDI02_B_2019287090415_O04743_T02491_02_001_01 download complete.\n",
+            "    Processing file: GEDI02_B_2019287090415_O04743_T02491_02_001_01.h5 (1/1)\n",
+            "    Sample data from geodataframe:\n",
+            "       BEAM        shot_number  ...  data_collected_date  accessed_date\n",
+            "0  BEAM0000  47430016100109663  ...           2019.10.14     2021-07-17\n",
+            "1  BEAM0000  47430016300109664  ...           2019.10.14     2021-07-17\n",
+            "2  BEAM0000  47430016500109665  ...           2019.10.14     2021-07-17\n",
+            "3  BEAM0000  47430016700109666  ...           2019.10.14     2021-07-17\n",
+            "4  BEAM0000  47430016900109667  ...           2019.10.14     2021-07-17\n",
+            "\n",
+            "[5 rows x 117 columns]\n",
+            "    Converting geodataframe to json.\n",
+            "FINISHED PROCESSING GEDI02_B_2019287090415_O04743_T02491_02_001_01.\n",
+            "------------------------------------\n",
+            "BEGIN DOWNLOAD AND PROCESSING GEDI02_B_2019242025913_O04041_T01068_02_001_01. FILE 2 OF 30.\n",
+            "    Begin GEDI02_B_2019242025913_O04041_T01068_02_001_01 download from EarthData.\n",
+            "    Created the subdirectory data/GEDI02_B.001/2019.08.30/\n",
+            "   GEDI02_B_2019242025913_O04041_T01068_02_001_01.h5 | 1.835GB | 100.0% [████████████████████████████████████████████████████████████████████████████████████████████████████]\n",
+            "    GEDI02_B_2019242025913_O04041_T01068_02_001_01 download complete.\n",
+            "    Processing file: GEDI02_B_2019242025913_O04041_T01068_02_001_01.h5 (1/1)\n",
+            "    WARNING - No intersecting shots found for BEAM1011\n",
+            "    Sample data from geodataframe:\n",
+            "       BEAM        shot_number  ...  data_collected_date  accessed_date\n",
+            "0  BEAM0000  40410009900109930  ...           2019.08.30     2021-07-17\n",
+            "1  BEAM0000  40410010100109931  ...           2019.08.30     2021-07-17\n",
+            "2  BEAM0000  40410010300109932  ...           2019.08.30     2021-07-17\n",
+            "3  BEAM0000  40410010500109933  ...           2019.08.30     2021-07-17\n",
+            "4  BEAM0000  40410010700109934  ...           2019.08.30     2021-07-17\n",
+            "\n",
+            "[5 rows x 117 columns]\n",
+            "    Converting geodataframe to json.\n",
+            "FINISHED PROCESSING GEDI02_B_2019242025913_O04041_T01068_02_001_01.\n",
+            "------------------------------------\n",
+            "BEGIN DOWNLOAD AND PROCESSING GEDI02_A_2019287090415_O04743_T02491_02_001_01. FILE 3 OF 30.\n",
+            "    Begin GEDI02_A_2019287090415_O04743_T02491_02_001_01 download from EarthData.\n",
+            "    Created the subdirectory data/GEDI02_A.001/2019.10.14/\n",
+            "   GEDI02_A_2019287090415_O04743_T02491_02_001_01.h5 | 8.629GB | 100.0% [████████████████████████████████████████████████████████████████████████████████████████████████████]\n",
+            "    GEDI02_A_2019287090415_O04743_T02491_02_001_01 download complete.\n",
+            "    Processing file: GEDI02_A_2019287090415_O04743_T02491_02_001_01.h5 (1/1)\n",
+            "    Sample data from geodataframe:\n",
+            "       BEAM        shot_number  ...  data_collected_date  accessed_date\n",
+            "0  BEAM0000  47430016100109663  ...           2019.10.14     2021-07-17\n",
+            "1  BEAM0000  47430016300109664  ...           2019.10.14     2021-07-17\n",
+            "2  BEAM0000  47430016500109665  ...           2019.10.14     2021-07-17\n",
+            "3  BEAM0000  47430016700109666  ...           2019.10.14     2021-07-17\n",
+            "4  BEAM0000  47430016900109667  ...           2019.10.14     2021-07-17\n",
+            "\n",
+            "[5 rows x 123 columns]\n",
+            "    Converting geodataframe to json.\n",
+            "FINISHED PROCESSING GEDI02_A_2019287090415_O04743_T02491_02_001_01.\n",
+            "------------------------------------\n",
+            "BEGIN DOWNLOAD AND PROCESSING GEDI02_A_2019242025913_O04041_T01068_02_001_01. FILE 4 OF 30.\n",
+            "    Begin GEDI02_A_2019242025913_O04041_T01068_02_001_01 download from EarthData.\n",
+            "    Created the subdirectory data/GEDI02_A.001/2019.08.30/\n",
+            "   GEDI02_A_2019242025913_O04041_T01068_02_001_01.h5 | 8.563GB | 100.0% [████████████████████████████████████████████████████████████████████████████████████████████████████]\n",
+            "    GEDI02_A_2019242025913_O04041_T01068_02_001_01 download complete.\n",
+            "    Processing file: GEDI02_A_2019242025913_O04041_T01068_02_001_01.h5 (1/1)\n",
+            "    WARNING - No intersecting shots found for BEAM1011\n",
+            "    Sample data from geodataframe:\n",
+            "       BEAM        shot_number  ...  data_collected_date  accessed_date\n",
+            "0  BEAM0000  40410009900109930  ...           2019.08.30     2021-07-17\n",
+            "1  BEAM0000  40410010100109931  ...           2019.08.30     2021-07-17\n",
+            "2  BEAM0000  40410010300109932  ...           2019.08.30     2021-07-17\n",
+            "3  BEAM0000  40410010500109933  ...           2019.08.30     2021-07-17\n",
+            "4  BEAM0000  40410010700109934  ...           2019.08.30     2021-07-17\n",
+            "\n",
+            "[5 rows x 123 columns]\n",
+            "    Converting geodataframe to json.\n",
+            "FINISHED PROCESSING GEDI02_A_2019242025913_O04041_T01068_02_001_01.\n",
+            "------------------------------------\n",
+            "BEGIN DOWNLOAD AND PROCESSING GEDI01_B_2019287090415_O04743_T02491_02_003_01. FILE 5 OF 30.\n",
+            "    Begin GEDI01_B_2019287090415_O04743_T02491_02_003_01 download from EarthData.\n",
+            "    Created the subdirectory data/GEDI01_B.001/2019.10.14/\n",
+            "   GEDI01_B_2019287090415_O04743_T02491_02_003_01.h5 | 13.40GB | 100.0% [████████████████████████████████████████████████████████████████████████████████████████████████████]\n",
+            "    GEDI01_B_2019287090415_O04743_T02491_02_003_01 download complete.\n",
+            "    Processing file: GEDI01_B_2019287090415_O04743_T02491_02_003_01.h5 (1/1)\n",
+            "    Sample data from geodataframe:\n",
+            "       BEAM        shot_number  ...  data_collected_date  accessed_date\n",
+            "0  BEAM0000  47430016100109663  ...           2019.10.14     2021-07-17\n",
+            "1  BEAM0000  47430016300109664  ...           2019.10.14     2021-07-17\n",
+            "2  BEAM0000  47430016500109665  ...           2019.10.14     2021-07-17\n",
+            "3  BEAM0000  47430016700109666  ...           2019.10.14     2021-07-17\n",
+            "4  BEAM0000  47430016900109667  ...           2019.10.14     2021-07-17\n",
+            "\n",
+            "[5 rows x 28 columns]\n",
+            "    Converting geodataframe to json.\n",
+            "FINISHED PROCESSING GEDI01_B_2019287090415_O04743_T02491_02_003_01.\n",
+            "------------------------------------\n",
+            "BEGIN DOWNLOAD AND PROCESSING GEDI01_B_2019242025913_O04041_T01068_02_003_01. FILE 6 OF 30.\n",
+            "    Begin GEDI01_B_2019242025913_O04041_T01068_02_003_01 download from EarthData.\n",
+            "    Created the subdirectory data/GEDI01_B.001/2019.08.30/\n",
+            "   GEDI01_B_2019242025913_O04041_T01068_02_003_01.h5 | 13.63GB | 100.0% [████████████████████████████████████████████████████████████████████████████████████████████████████]\n",
+            "    GEDI01_B_2019242025913_O04041_T01068_02_003_01 download complete.\n",
+            "    Processing file: GEDI01_B_2019242025913_O04041_T01068_02_003_01.h5 (1/1)\n",
+            "    WARNING - No intersecting shots found for BEAM1011\n",
+            "    Sample data from geodataframe:\n",
+            "       BEAM        shot_number  ...  data_collected_date  accessed_date\n",
+            "0  BEAM0000  40410009900109930  ...           2019.08.30     2021-07-17\n",
+            "1  BEAM0000  40410010100109931  ...           2019.08.30     2021-07-17\n",
+            "2  BEAM0000  40410010300109932  ...           2019.08.30     2021-07-17\n",
+            "3  BEAM0000  40410010500109933  ...           2019.08.30     2021-07-17\n",
+            "4  BEAM0000  40410010700109934  ...           2019.08.30     2021-07-17\n",
+            "\n",
+            "[5 rows x 28 columns]\n",
+            "    Converting geodataframe to json.\n",
+            "FINISHED PROCESSING GEDI01_B_2019242025913_O04041_T01068_02_003_01.\n",
+            "------------------------------------\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "A8_wnR8OrpkU"
+      },
+      "source": [
+        "Combine the JSONs from each product into a single file.\n",
+        "\n",
+        "All files share the same 'type' and 'crs' attributes. The 'features' attribute holds the data.\n",
+        "Example structure:\n",
+        "\n",
+        "\n",
+        "\n",
+        "```\n",
+        "{\n",
+        "    \"type\": \"FeatureCollection\",\n",
+        "    \"crs\": {\n",
+        "        \"type\": \"name\",\n",
+        "        \"properties\": {\n",
+        "            \"name\": \"urn:ogc:def:crs:OGC:1.3:CRS84\"\n",
+        "        }\n",
+        "    },\n",
+        "    \"features\": [{\n",
+        "            \"type\": \"Feature\",\n",
+        "            \"properties\": {\n",
+        "                \"BEAM\": \"0\",\n",
+        "                \"shot_number_x\": 90830000500534134\n",
+        "            },\n",
+        "            \"geometry\": {\n",
+        "                \"type\": \"Point\",\n",
+        "                \"coordinates\": [\n",
+        "                    -44.17234047170119,\n",
+        "                    -13.743702059659597\n",
+        "                ]\n",
+        "            }\n",
+        "      }]\n",
+        "}\n",
+        "```\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ammwY1mKrwCV",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "9394d91c-262d-4d9c-e544-8faa1522f858"
+      },
+      "source": [
+        "features = []\n",
+        "firstPass = True\n",
+        "headerType = \"\"\n",
+        "headerCRS = \"\"\n",
+        "\n",
+        "products = [product_1B + '.' + version, product_2A + '.' + version, product_2B + '.' + version]\n",
+        "\n",
+        "for p in products:\n",
+        "  features = []\n",
+        "  print (f\"combining {p} json files from /content/data/{p}\")\n",
+        "  for root, dirs, files in os.walk(f\"/content/data/{p}\"):\n",
+        "    for file in files:\n",
+        "      if file.endswith(\".json\"):\n",
+        "        print(\" file: \" + os.path.join(root, file))\n",
+        "        with open(os.path.join(root, file)) as f:\n",
+        "          jsonFile = json.load(f)\n",
+        "          headerType = jsonFile[\"type\"]\n",
+        "          headerCRS = jsonFile[\"crs\"]\n",
+        "          dataOne=jsonFile[\"features\"] #this is a list\n",
+        "          features = features + dataOne\n",
+        "          firstPass = False\n",
+        "  \n",
+        "  print(f\"after getting the data. product: {p}. \")      \n",
+        "  jsonDict = {\n",
+        "    \"type\": headerType,\n",
+        "    \"crs\": headerCRS,\n",
+        "    \"features\": features\n",
+        "  }\n",
+        "\n",
+        "  #If the features attribute is empty, there were no json files found.\n",
+        "  if jsonDict[\"features\"] == []: \n",
+        "    print(\" no data found\")\n",
+        "    continue;\n",
+        "\n",
+        "  combinedJsonFile = p + \".json\"\n",
+        "  with open(combinedJsonFile, 'w') as json_file:\n",
+        "    json.dump(jsonDict, json_file)\n",
+        "\n",
+        "  print(\" Complete\")"
+      ],
+      "execution_count": 8,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "combining GEDI01_B.001 json files from /content/data/GEDI01_B.001\n",
+            " file: /content/data/GEDI01_B.001/2019.10.14/GEDI01_B_2019287090415_O04743_T02491_02_003_01.json\n",
+            " file: /content/data/GEDI01_B.001/2019.08.30/GEDI01_B_2019242025913_O04041_T01068_02_003_01.json\n",
+            "after getting the data. product: GEDI01_B.001. \n",
+            " Complete\n",
+            "combining GEDI02_A.001 json files from /content/data/GEDI02_A.001\n",
+            " file: /content/data/GEDI02_A.001/2019.10.14/GEDI02_A_2019287090415_O04743_T02491_02_001_01.json\n",
+            " file: /content/data/GEDI02_A.001/2019.08.30/GEDI02_A_2019242025913_O04041_T01068_02_001_01.json\n",
+            "after getting the data. product: GEDI02_A.001. \n",
+            " Complete\n",
+            "combining GEDI02_B.001 json files from /content/data/GEDI02_B.001\n",
+            " file: /content/data/GEDI02_B.001/2019.10.14/GEDI02_B_2019287090415_O04743_T02491_02_001_01.json\n",
+            " file: /content/data/GEDI02_B.001/2019.08.30/GEDI02_B_2019242025913_O04041_T01068_02_001_01.json\n",
+            "after getting the data. product: GEDI02_B.001. \n",
+            " Complete\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file