From c42e3f4421bcfa9e83b4a65409bb632c34ab92a9 Mon Sep 17 00:00:00 2001 From: kelle Date: Tue, 27 Jan 2026 17:54:46 -0500 Subject: [PATCH 1/7] re-org of ingesting doc pages --- docs/index.rst | 2 +- ...etting_started_ingesting.rst => index.rst} | 6 +- docs/pages/ingesting/ingest_scripts.rst | 66 --------- docs/pages/ingesting/ingest_scripts/index.rst | 126 ++++++++++++++++++ .../ingest_scripts/writing_scripts.rst | 120 +++++++++++++++++ 5 files changed, 250 insertions(+), 70 deletions(-) rename docs/pages/ingesting/{getting_started_ingesting.rst => index.rst} (89%) delete mode 100644 docs/pages/ingesting/ingest_scripts.rst create mode 100644 docs/pages/ingesting/ingest_scripts/index.rst create mode 100644 docs/pages/ingesting/ingest_scripts/writing_scripts.rst diff --git a/docs/index.rst b/docs/index.rst index 4ba93df..ee35e28 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,7 +22,7 @@ User Guide Installing astrodb-utils pages/loading/index pages/querying_existing_db/index - pages/ingesting/getting_started_ingesting + pages/ingesting/index pages/modifying/index pages/make_new_db/index pages/template_repo/index diff --git a/docs/pages/ingesting/getting_started_ingesting.rst b/docs/pages/ingesting/index.rst similarity index 89% rename from docs/pages/ingesting/getting_started_ingesting.rst rename to docs/pages/ingesting/index.rst index 2b61d48..099c862 100644 --- a/docs/pages/ingesting/getting_started_ingesting.rst +++ b/docs/pages/ingesting/index.rst @@ -6,10 +6,10 @@ Ingesting and Modifying Data :maxdepth: 2 :titlesonly: + ingest_scripts/index ingesting_publications spectra/* - ingest_scripts - + .. note:: @@ -26,4 +26,4 @@ API Documentation Function to ingest alternative name data :py:mod:`astrodb_utils.utils.ingest_instrument` - Function to ingest instrument data \ No newline at end of file + Function to ingest instrument data diff --git a/docs/pages/ingesting/ingest_scripts.rst b/docs/pages/ingesting/ingest_scripts.rst deleted file mode 100644 index b788950..0000000 --- a/docs/pages/ingesting/ingest_scripts.rst +++ /dev/null @@ -1,66 +0,0 @@ -Ingest Scripts -============== -Ingest scripts can be used to add a bunch of data to the database at once. -Often ingests are performed by reading in a file (e.g., csv) that contains -a table of data and then ingesting each row of the table into the database. -Below is an example script for ingesting sources discovered by -Rojas et al. 2012 into the SIMPLE Archive from a .csv file -that has columns named `name`, `ra`, `dec`. - -.. code-block:: python - - from astropy.io import ascii - from simple.schema import REFERENCE_TABLES - from astrodb_utils import load_astrodb, logger, AstroDBError - from astrodb_utils.sources import ingest_source - from astrodb_utils.publications import ingest_publication - - SAVE_DB = False # Set to True to write out the JSON files at the end of the script - RECREATE_DB = True # Set to True to recreate the database from the JSON files - - # Load the database - db = load_astrodb("SIMPLE.sqlite", - recreatedb=RECREATE_DB, - reference_tables=REFERENCE_TABLES, - felis_schema="simple/schema.yaml", - ) - - - def ingest_pubs(db): - # Ingest discovery publication - ingest_publication( - db, - doi="10.1088/0004-637X/748/2/93" - ) - - def ingest_sources(db): - # read the csv data into an astropy table - data_table = ascii.read(file.csv, format="csv") - - n_added = 0 - n_skipped = 0 - - for source in data_table: - try: - ingest_source( - db, - source=data_table['name'], - ra=data_table['ra'], - dec=data_table['dec'], - reference="Roja12", - ) - logger.info(f"Source {source['name']} ingested.") - n_added += 1 - except AstroDBError as e: - logger.warning(f"Error ingesting source {source['name']}: {e}") - n_skipped += 1 - continue - - - ingest_pubs(db) - ingest_sources(db) - - logger.info(f"Added {n_added} sources, skipped {n_skipped} sources.") - - if DB_SAVE: - db.save() diff --git a/docs/pages/ingesting/ingest_scripts/index.rst b/docs/pages/ingesting/ingest_scripts/index.rst new file mode 100644 index 0000000..eeab737 --- /dev/null +++ b/docs/pages/ingesting/ingest_scripts/index.rst @@ -0,0 +1,126 @@ +Ingest Scripts +============== + +.. toctree:: + :glob: + :maxdepth: 1 + + writing_scripts + +Ingest scripts can be used to add a bunch of data to the database at once. +Often ingests are performed by reading in a file (e.g., csv) that contains +a table of data and then ingesting each row of the table into the database. + + +Loading the Database +-------------------- + +.. code-block:: python + + from astrodb-utils import build_db_from_json + + db = build_db_from_json(settings_file = "path/to/database.toml") + +First, we need to load our database using the ``build_db_from_json`` function +from ``astrodb-utils``. +This function takes in a settings file (in TOML format) that contains +information about our database, including its name. +The ``build_db_from_json`` function will preform a full rebuild of the +database from the JSON data files, +essentially reconstructing it from scratch. + + +Setting Up Your Data +-------------------- + +Often ingests are performed by reading in a file (e.g., csv) that contains a +table of data and then ingesting each row of the table into the database. +Therefore, it is important to convert your data into a format that is easy +to read in Python. + +.. code-block:: python + + L6T6_link = ( + "scripts/ingests/zjzhang/L6_to_T6_benchmarks08062025.csv" + ) + + L6T6_table = ascii.read( + L6T6_link, + format="csv", + data_start=1, + header_start=0, + guess=False, + fast_reader=False, + delimiter=",", + ) + +First, we define a variable that points to the location of our data file, +in which we then use to read in our data file as an Astropy Table. +Here, we specify that our file is in csv format and provide additional +parameters to ensure the file is read correctly. +For example, data_start and header_start specify which rows contain the data +and the header, respectively, while delimiter indicates that the file is +comma-separated. +The resulting ``L6T6_table`` variable is now an Astropy Table object that +contains all the data from the csv file, which we can then loop through +and ingest each row into the database. + + +Another Example Ingest Script +----------------------------- +Below is an example script for ingesting sources discovered by +Rojas et al. 2012 into the SIMPLE Archive from a .csv file +that has columns named `name`, `ra`, `dec`. + +.. code-block:: python + + from astropy.io import ascii + from astrodb-utils import build_db_from_json + from astrodb_utils.sources import ingest_source + from astrodb_utils.publications import ingest_publication + + DB_SAVE = True + + # Load the database + db = build_db_from_json(settings_file="path/to/database.toml") + + + def ingest_pubs(db): + # Ingest discovery publication + ingest_publication( + db, + doi="10.1088/0004-637X/748/2/93" + ) + + + def ingest_sources(db): + # read the csv data into an astropy table + data_table = ascii.read(file.csv, format="csv") + + n_added = 0 + n_skipped = 0 + + for source in data_table: + try: + ingest_source( + db, + source=data_table['name'], + ra=data_table['ra'], + dec=data_table['dec'], + reference="Roja12", + ) + logger.info(f"Source {source['name']} ingested.") + n_added += 1 + except AstroDBError as e: + logger.warning(f"Error ingesting source {source['name']}: {e}") + n_skipped += 1 + continue + + + ingest_pubs(db) + ingest_sources(db) + + logger.info(f"Added {n_added} sources, skipped {n_skipped} sources.") + + if DB_SAVE: + db.save() diff --git a/docs/pages/ingesting/ingest_scripts/writing_scripts.rst b/docs/pages/ingesting/ingest_scripts/writing_scripts.rst new file mode 100644 index 0000000..6a93ad1 --- /dev/null +++ b/docs/pages/ingesting/ingest_scripts/writing_scripts.rst @@ -0,0 +1,120 @@ +Writing Scripts +=============== + +When writing ingest scripts, there are two different ways to go about it: +using existing ingest functions from `astrodb_utils` or using sqlalchemy +commands. + + +Using Existing Ingest functions +------------------------------- +Using existing ingest functions helps streamline the process of writing an +ingest script. +However, only few ingest functions exist, namely for sources, names, and +instruments. +Therefore, if your data fits into one of these categories, it is recommended +to use the existing functions. + +Below is an example of how to use the `ingest_source` function to ingest source +data into the database: + +.. code-block:: python + + for source in bones_sheet_table: + + ingest_source( + db, + source=source["NAME"], + reference=reference[1], + ra=source["RA"], + dec=source["DEC"], + raise_error=True, + search_db=True, + comment="Discovery reference from the BONES archive", + ) + +Note that the basic structure for any ingest is looping through each row of +your data table and appropriately ingesting each row into the database with +the relevant parameters. +Each ingest function will have different required and optional parameters, +so be sure to check the API documentation for more details. + + +Using SQLAlchemy Commands +------------------------- +If there is no existing ingest function for your data type, you can use +sqlalchemy commands to directly ingest into the database. + +Below is an example of how to ingest modeled parameters data into the database +using sqlalchemy commands: + +.. code-block:: python + + for row in L6T6_table: + with db.engine.connect() as conn: + conn.execute( + db.ModeledParameters.insert().values( + { + "source": L6T6_table["NAME"], + "model": L6T6_table["MODEL"], + "parameter": L6T6_table["PARAM"], + "value": L6T6_table["VAL"], + "upper_error": L6T6_table["UPP_ERR"], + "lower_error": L6T6_table["LOW_ERR"], + "unit": L6T6_table["UNIT"], + "comments": "Ingested from compilation by Zhang et al. (2020ApJ...891..171Z)", + "reference": L6T6_table["REF"] + } + ) + ) + conn.commit() + +Here, we follow the same format of looping through each row of our data table +and then using insert commands to add each row into the database. + +Since there is no existing ingest function, there are a few things to keep +note of. For example, make sure to change the table name after ``db.`` to the +appropriate table you are ingesting into. + +It is also important to reference the schema to ensure your code matches the +database structure. For example, make sure that the column names inside the +``values()`` method match exactly with the column names in the database schema. +Additionally, the schema, which is availible in your code under the utils +folder, will indicate which columns are required versus optional (check nullable +in the column you are referencing), so be sure to include all required columns in +your code to avoid any errors. Finally, make sure to commit the changes to the +database after executing the command with ``conn.commit()``. + +Logging Setup +------------- + +When working with data ingestion scripts or database-building workflows, +it's important to have a reliable way to understand what the script is +doing internally. +Python's built-in logging module provides a structured system for +reporting events, progress updates, and errors during execution. + +.. code-block:: python + + logger = logging.getLogger("AstroDB") + logger.setLevel(logging.INFO) + +By instantiating a logger for your script, it creates an easier way for you +to track what your script is doing: database loading, ingest errors, warnings, +etc. + +The line ``logger.setLevel(logging.INFO)`` configures the logger to display +only log messages at level INFO or higher. +Python provides multiple logging levels, including: + +* DEBUG:extremely detailed diagnostic output +* INFO: general runtime information +* WARNING: unexpected events that do not stop execution +* ERROR: serious problems that prevent part of the script from running +* CRITICAL: errors severe enough to stop execution entirely + +Database ingestion often involves multiple operations happening quickly, +therefore setting the level prevents you from being flooded with low-level +debugging messages. +This filters out unimportant information, making it easier to read and +facilitates the process of diagnosing ingestion problems or error messages. From fdcbd69a17d77b3caf5ae43c6e23d6265576616a Mon Sep 17 00:00:00 2001 From: kelle Date: Tue, 27 Jan 2026 18:09:46 -0500 Subject: [PATCH 2/7] link to API docs for functions --- docs/pages/ingesting/ingest_scripts/index.rst | 6 +++--- docs/pages/ingesting/ingest_scripts/writing_scripts.rst | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/pages/ingesting/ingest_scripts/index.rst b/docs/pages/ingesting/ingest_scripts/index.rst index eeab737..ca06150 100644 --- a/docs/pages/ingesting/ingest_scripts/index.rst +++ b/docs/pages/ingesting/ingest_scripts/index.rst @@ -21,11 +21,11 @@ Loading the Database db = build_db_from_json(settings_file = "path/to/database.toml") -First, we need to load our database using the ``build_db_from_json`` function -from ``astrodb-utils``. +First, we need to load our database using the +:py:func:`astrodb_utils.loaders.build_db_from_json` function. This function takes in a settings file (in TOML format) that contains information about our database, including its name. -The ``build_db_from_json`` function will preform a full rebuild of the +The ``build_db_from_json`` function will perform a full rebuild of the database from the JSON data files, essentially reconstructing it from scratch. diff --git a/docs/pages/ingesting/ingest_scripts/writing_scripts.rst b/docs/pages/ingesting/ingest_scripts/writing_scripts.rst index 6a93ad1..55f08ab 100644 --- a/docs/pages/ingesting/ingest_scripts/writing_scripts.rst +++ b/docs/pages/ingesting/ingest_scripts/writing_scripts.rst @@ -15,7 +15,8 @@ instruments. Therefore, if your data fits into one of these categories, it is recommended to use the existing functions. -Below is an example of how to use the `ingest_source` function to ingest source +Below is an example of how to use the +:py:func:`astrodb_utils.sources.ingest_source` function to ingest source data into the database: .. code-block:: python From 44b548dcf04339f5f5438fd91f425311e4d1b438 Mon Sep 17 00:00:00 2001 From: kelle Date: Tue, 27 Jan 2026 18:12:33 -0500 Subject: [PATCH 3/7] lint --- docs/pages/ingesting/ingest_scripts/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/pages/ingesting/ingest_scripts/index.rst b/docs/pages/ingesting/ingest_scripts/index.rst index ca06150..c964f16 100644 --- a/docs/pages/ingesting/ingest_scripts/index.rst +++ b/docs/pages/ingesting/ingest_scripts/index.rst @@ -75,7 +75,7 @@ that has columns named `name`, `ra`, `dec`. .. code-block:: python from astropy.io import ascii - from astrodb-utils import build_db_from_json + from astrodb_utils.loaders import build_db_from_json from astrodb_utils.sources import ingest_source from astrodb_utils.publications import ingest_publication From c00a2f13cc5957fa1183743b64e737696744ddc3 Mon Sep 17 00:00:00 2001 From: Kelle Cruz Date: Tue, 3 Feb 2026 16:11:20 -0500 Subject: [PATCH 4/7] Update docs/pages/ingesting/ingest_scripts/index.rst Co-authored-by: David Rodriguez --- docs/pages/ingesting/ingest_scripts/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/pages/ingesting/ingest_scripts/index.rst b/docs/pages/ingesting/ingest_scripts/index.rst index c964f16..104bc30 100644 --- a/docs/pages/ingesting/ingest_scripts/index.rst +++ b/docs/pages/ingesting/ingest_scripts/index.rst @@ -17,7 +17,7 @@ Loading the Database .. code-block:: python - from astrodb-utils import build_db_from_json + from astrodb_utils import build_db_from_json db = build_db_from_json(settings_file = "path/to/database.toml") From 0d83b5cfc36acc9793adde987e0ff6c2900b0e1b Mon Sep 17 00:00:00 2001 From: Kelle Cruz Date: Tue, 3 Feb 2026 16:11:42 -0500 Subject: [PATCH 5/7] Update docs/pages/ingesting/ingest_scripts/writing_scripts.rst Co-authored-by: David Rodriguez --- docs/pages/ingesting/ingest_scripts/writing_scripts.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/pages/ingesting/ingest_scripts/writing_scripts.rst b/docs/pages/ingesting/ingest_scripts/writing_scripts.rst index 55f08ab..29e692f 100644 --- a/docs/pages/ingesting/ingest_scripts/writing_scripts.rst +++ b/docs/pages/ingesting/ingest_scripts/writing_scripts.rst @@ -80,7 +80,7 @@ appropriate table you are ingesting into. It is also important to reference the schema to ensure your code matches the database structure. For example, make sure that the column names inside the ``values()`` method match exactly with the column names in the database schema. -Additionally, the schema, which is availible in your code under the utils +Additionally, the schema, which is available in your code under the utils folder, will indicate which columns are required versus optional (check nullable in the column you are referencing), so be sure to include all required columns in your code to avoid any errors. Finally, make sure to commit the changes to the From 7063b8967bc8e2ab2d7721e446a52f92f7f775f7 Mon Sep 17 00:00:00 2001 From: Kelle Cruz Date: Tue, 3 Feb 2026 16:11:50 -0500 Subject: [PATCH 6/7] Update docs/pages/ingesting/ingest_scripts/writing_scripts.rst Co-authored-by: David Rodriguez --- docs/pages/ingesting/ingest_scripts/writing_scripts.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/pages/ingesting/ingest_scripts/writing_scripts.rst b/docs/pages/ingesting/ingest_scripts/writing_scripts.rst index 29e692f..400f494 100644 --- a/docs/pages/ingesting/ingest_scripts/writing_scripts.rst +++ b/docs/pages/ingesting/ingest_scripts/writing_scripts.rst @@ -97,6 +97,7 @@ reporting events, progress updates, and errors during execution. .. code-block:: python + import logging logger = logging.getLogger("AstroDB") logger.setLevel(logging.INFO) From 613ba3c3ae2679c2de19004287454aa150a54f6c Mon Sep 17 00:00:00 2001 From: kelle Date: Tue, 3 Feb 2026 16:37:36 -0500 Subject: [PATCH 7/7] improve example ingest script --- docs/pages/ingesting/ingest_scripts/index.rst | 44 ++++++++++++------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/docs/pages/ingesting/ingest_scripts/index.rst b/docs/pages/ingesting/ingest_scripts/index.rst index 104bc30..52b261e 100644 --- a/docs/pages/ingesting/ingest_scripts/index.rst +++ b/docs/pages/ingesting/ingest_scripts/index.rst @@ -8,8 +8,9 @@ Ingest Scripts writing_scripts Ingest scripts can be used to add a bunch of data to the database at once. -Often ingests are performed by reading in a file (e.g., csv) that contains -a table of data and then ingesting each row of the table into the database. +Ingest scripts also aid in reproducibilty since they document exactly how +data was added to the database. +They can also be reused later to add similar data. Loading the Database @@ -35,11 +36,16 @@ Setting Up Your Data Often ingests are performed by reading in a file (e.g., csv) that contains a table of data and then ingesting each row of the table into the database. -Therefore, it is important to convert your data into a format that is easy -to read in Python. +Therefore, it is important to read in your data into a format that is easy +to work with, such as an `Astropy Table `_ +or pandas DataFrame. + +Here is an example of reading in a csv file using Astropy's ascii module: .. code-block:: python + from astropy.io import ascii + L6T6_link = ( "scripts/ingests/zjzhang/L6_to_T6_benchmarks08062025.csv" ) @@ -65,6 +71,8 @@ The resulting ``L6T6_table`` variable is now an Astropy Table object that contains all the data from the csv file, which we can then loop through and ingest each row into the database. +There are many ways to read in data files in Python, so feel free to use +other libraries or methods that you are comfortable with, such as pandas. Another Example Ingest Script ----------------------------- @@ -79,11 +87,15 @@ that has columns named `name`, `ra`, `dec`. from astrodb_utils.sources import ingest_source from astrodb_utils.publications import ingest_publication - DB_SAVE = True + DB_SAVE = False # Set to True once script can run without errors and all sources can be ingested # Load the database db = build_db_from_json(settings_file="path/to/database.toml") + # Set the logger setting to control how much output is shown + import logging + logger = logging.getLogger("astrodb_utils") + logger.setLevel(logging.INFO) # Set to DEBUG for more verbosity def ingest_pubs(db): # Ingest discovery publication @@ -101,26 +113,24 @@ that has columns named `name`, `ra`, `dec`. n_skipped = 0 for source in data_table: - try: - ingest_source( - db, - source=data_table['name'], - ra=data_table['ra'], - dec=data_table['dec'], - reference="Roja12", - ) - logger.info(f"Source {source['name']} ingested.") - n_added += 1 + ingest_source( + db, + source=data_table['name'], + ra=data_table['ra'], + dec=data_table['dec'], + reference="Roja12", + raise_error=True, + ) + n_added += 1 except AstroDBError as e: logger.warning(f"Error ingesting source {source['name']}: {e}") n_skipped += 1 continue + print(f"Added {n_added} sources, skipped {n_skipped} sources.") ingest_pubs(db) ingest_sources(db) - logger.info(f"Added {n_added} sources, skipped {n_skipped} sources.") - if DB_SAVE: db.save()