Skip to content
This repository was archived by the owner on Jan 26, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,12 @@ fast even for huge files.

![](images/visual-demo.png)


## Installation

```bash
pip install fastapi-csv
```


## How to use it

### 1. From the command line
Expand All @@ -29,16 +27,15 @@ an API for it, run one of:

```bash
# from file
fastapi-csv people.csv
fastapi-csv dictionary.csv --delimiter "\t"

# directly from URL
fastapi-csv https://raw.githubusercontent.com/jrieke/fastapi-csv/main/people.csv

# With options : specify the delimiter character in the CSV, the host and port for the server
fastapi-csv people.csv --delimiter "|" --host "127.0.0.1" --port 1234
fastapi-csv dictionary.csv --delimiter "|" --host "127.0.0.1" --port 1234

```

Either command should start a fastapi instance, which has auto-generated endpoints and
query parameters based on the CSV file. Here, the API has an endpoint `/people`
(same name as the file), which can be queried using the CSV's column names, e.g. you can
Expand All @@ -57,7 +54,6 @@ types, e.g.
Check out the API docs for more information and an interactive demo, they should be at
http://127.0.0.1:8000/docs


### 2. From Python

Create a file `my_file.py`:
Expand Down Expand Up @@ -85,11 +81,9 @@ all the stuff you can do with a normal fastapi instance, e.g. add a new endpoint
def hello(self):
return {"Hello:", "World"}
```

In the future, you will also be able to easily modify existing endpoints that were
generated from the CSV file.


**Updating the data**

If your CSV file changes, you can update the API data with:
Expand Down
6 changes: 6 additions & 0 deletions dictionary.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
TL,TLMorph,SL,SLMorph,POS,Domain
Auto,m,car,,noun,general
Mutter,f,mother,,noun,general
Mutter,f,screw,,noun,technical
Leben,n,life,,noun,general
leben,trv;intrv,to live,,verb,general
84 changes: 53 additions & 31 deletions fastapi_csv/applications.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
"""
Contains the main `FastAPI_CSV` class, which wraps `FastAPI`.
"""

import os
from typing import Union, Type
from pathlib import Path
# Contains the main `FastAPI_CSV` class, which wraps `FastAPI`.
import inspect
import logging

from fastapi import FastAPI
import fastapi
import pandas as pd
import re
import sqlite3
from pathlib import Path
from typing import Union, Type
import fastapi
import numpy as np
import pandas as pd
import pydantic
from fastapi import FastAPI

def is_date_string(string: str) -> bool:
"""Check if a string is a date string."""
return re.match(r"^([0-9]{4})-(?:[0-9]{2})-([0-9]{2})$", string) is not None


def create_query_param(name: str, type_: Type, default) -> pydantic.fields.ModelField:
Expand All @@ -29,62 +29,58 @@ def create_query_param(name: str, type_: Type, default) -> pydantic.fields.Model
)
return field


def dtype_to_type(dtype) -> Type:
"""Convert numpy/pandas dtype to normal Python type."""
if dtype == np.object:
return str
else:
return type(np.zeros(1, dtype).item())


class FastAPI_CSV(FastAPI):
# TODO: Implement a way to modify auto-generated endpoints, e.g. by
#
# @app.modify("/people")
# def modify_people(results: List, new_query_param: str = "foo"):
#
# # `results` are the dicts/json that are normally returned by the endpoint.
# # Modify them as you like.
# results.append({"Hello": "World"})
#
# # Any additional function args (like `new_query_param`) are added as p
# # arameters to the endpoint, just like in normal fastapi.
# results.append({"Hello": new_query_param})
#
# # You can also do a manual query on the database that's created from the CSV.
# rows = app.query_database(f"SELECT * FROM people WHERE first_name={new_query_param}")
# results.append(rows)
#
# # Return modified results so they get passed to the user.
# return results

def __init__(self, csv_path: Union[str, Path], delimiter: Union[str, str]) -> None:
"""
Initializes a FastAPI instance that serves data from a CSV file.

Args:
csv_path (Union[str, Path]): The path to the CSV file, can also be a URL
"""
super().__init__()

# Read CSV file to pandas dataframe and create sqlite3 database from it.
self.csv_path = csv_path
self.delimiter = delimiter
self.table_name = Path(self.csv_path).stem
self.table_name = Path(self.csv_path).stem.replace('-', '_')
self.con = None
self.delimiter = delimiter
df = self.update_database()

# Add an endpoint for the CSV file with one query parameter for each column.
# We hack into fastapi a bit here to inject the query parameters at runtime
# based on the column names/types.

# We hack into fastapi a bit here to inject the query parameters at runtime based on the column names/types.
# First, define a generic endpoint method, which queries the database.
def generic_get(**kwargs):
selected_cols = []
where_clauses = []
use_distinct = False
for name, val in kwargs.items():
if val is not None:
if name.endswith("_greaterThan"):
if name == "use_distinct":
use_distinct = True
elif name.endswith("_selected"):
selected_cols.append(name[:-9])
elif name.endswith("_greaterThan"):
where_clauses.append(f"{name[:-12]}>{val}")
elif name.endswith("_greaterThanEqual"):
where_clauses.append(f"{name[:-17]}>={val}")
Expand All @@ -94,6 +90,14 @@ def generic_get(**kwargs):
where_clauses.append(f"{name[:-14]}<={val}")
elif name.endswith("_contains"):
where_clauses.append(f"instr({name[:-9]}, '{val}') > 0")
elif name.endswith("_like"):
where_clauses.append(f"{name[:-5]} LIKE '{val}'")
elif name.endswith("_regex"):
where_clauses.append(f"{name[:-6]} REGEXP '{val}'")
elif name.endswith("_isBefore"):
where_clauses.append(f"DATE({name[:-9]}) < DATE('{val}')")
elif name.endswith("_isAfter"):
where_clauses.append(f"DATE({name[:-8]}) > DATE('{val}')")
else:
if isinstance(val, str):
val = f"'{val}'"
Expand All @@ -103,7 +107,8 @@ def generic_get(**kwargs):
else:
where = ""

sql_query = f"SELECT * FROM {self.table_name} {where}"
selection = ','.join(selected_cols)
sql_query = f"SELECT {'DISTINCT' if use_distinct else ''} {selection if len(selected_cols) else '*'} FROM {self.table_name} {where}"
dicts = self.query_database(sql_query)
return dicts

Expand All @@ -114,17 +119,27 @@ def generic_get(**kwargs):
# Remove all auto-generated query parameters (=one for `kwargs`).
self._clear_query_params(route_path)

# Add use_distinct query param
self._add_query_param(route_path, 'use_distinct', bool)

# Add new query parameters based on column names and data types.
for col, dtype in zip(df.columns, df.dtypes):
type_ = dtype_to_type(dtype)
self._add_query_param(route_path, col, type_)
# Use as a flag to select only given columns
self._add_query_param(route_path, col + "_selected", bool)
if type_ in (int, float):
self._add_query_param(route_path, col + "_greaterThan", type_)
self._add_query_param(route_path, col + "_greaterThanEqual", type_)
self._add_query_param(route_path, col + "_lessThan", type_)
self._add_query_param(route_path, col + "_lessThanEqual", type_)
elif type_ == str:
self._add_query_param(route_path, col + "_contains", type_)
self._add_query_param(route_path, col + "_like", type_)
self._add_query_param(route_path, col + "_regex", type_)
if is_date_string(df[col].iloc[df[col].first_valid_index()]):
self._add_query_param(route_path, col + "_isBefore", type_)
self._add_query_param(route_path, col + "_isAfter", type_)

def query_database(self, sql_query):
"""Executes a SQL query on the database and returns rows as list of dicts."""
Expand All @@ -135,8 +150,8 @@ def query_database(self, sql_query):

def delete_database(self):
"""
Deletes the database with all data read from the CSV.

Deletes the database with all data read from the CSV.
The CSV file is not deleted of course. The API endpoints are also not affected,
so you can use `update_data` to read in new data.
"""
Expand All @@ -150,9 +165,9 @@ def delete_database(self):
def update_database(self):
"""
Updates the database with the current data from the CSV file.

Note that this only affects the database, not the endpoints. If the column names
and/or data types in the CSV change (and you want that to update in the
and/or data types in the CSV change (and you want that to update in the
endpoints as well), you need to create a new FastAPI_CSV object.
"""
self.delete_database()
Expand All @@ -165,7 +180,7 @@ def update_database(self):

# Download excel file from Google Sheets, read it with pandas and write to
# database.
df = pd.read_csv(self.csv_path, delimiter=self.delimiter)
df = pd.read_csv(self.csv_path, delimiter=self.delimiter, engine ='python')
self.con = sqlite3.connect(":memory:", check_same_thread=False)
df.to_sql(self.table_name, self.con)

Expand All @@ -180,6 +195,13 @@ def dict_factory(cursor, row):
self.con.row_factory = dict_factory
logging.info("Database successfully updated")

# Implement the REGEXP operator in SQLite.
def regexp(expr, item):
if item is None:
return False
reg = re.compile(expr)
return reg.search(item) is not None
self.con.create_function("REGEXP", 2, regexp)
return df

def _find_route(self, route_path):
Expand Down
Binary file removed images/visual-demo.png
Binary file not shown.
6 changes: 0 additions & 6 deletions people.csv

This file was deleted.

12 changes: 6 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
numpy==1.19.4
pydantic==1.6.1
pandas==1.1.3
uvicorn==0.13.3
fastapi==0.63.0
typer==0.3.2
numpy>=1.19.4
pydantic>=1.6.1
pandas>=1.1.3
uvicorn>=0.13.3
fastapi>=0.63.0
typer>=0.3.2