diff --git a/.gitignore b/.gitignore index c645042..601c738 100644 --- a/.gitignore +++ b/.gitignore @@ -190,4 +190,7 @@ project_podcast_summarizer/data/summaries/*.json project_podcast_summarizer/data/transcripts/mini.txt # fine tuning -*.jsonl \ No newline at end of file +*.jsonl + +# Projects +project_rag/app/index_store/*.json \ No newline at end of file diff --git a/project_rag/README.md b/project_rag/README.md new file mode 100644 index 0000000..b189681 --- /dev/null +++ b/project_rag/README.md @@ -0,0 +1,41 @@ +## New Additions + +- Data Engineering Now Generates the Index Store +- App now loads the index store +- Chatbot Queries now operate with RAG + +## New Project Local Setup +- cd into project_rag directory +- Create new virtualenv and activate it +- pip install -r requirements.txt +- PYTHONPATH=../ python data_engineering/rag_index_generator.py # note this uses the parent directory for PYTHONPATH + +If you get: +```shell +Traceback (most recent call last): + File "/PATH/TO/REPO/ai-engineering-course/project_rag/data_engineering/rag_index_generator.py", line 12, in + from shared.settings import DATA_DIR, BASE_DIR +ModuleNotFoundError: No module named 'shared' +``` +It is because you have not appended the repo root directory to your PYTHONPATH environment variable + +## Code New Additions +- RAG index generation (offline) +- New endpoint with chat inference (online) (incl. new schema) +- New chatbot template (online) +- RAG capabilities for this endpoint (online + deps.py) +- Split out config and New evar + + +## Project Local Setup (Same as previous project) + +1. cd into project directory & create virtualenv & activate it +2. `pip install -r requirements.txt` +3. Run the DB migrations `PYTHONPATH=. python prestart.py` (only required once) +4. Run the FastAPI server Python command: `PYTHONPATH=. python app/main.py` +6. Open http://localhost:8001/ + + +## Troubleshooting +`ModuleNotFoundError: No module named 'project_rag'` - means that you need to add the +`project_rag` directory to your PYTHONPATH. \ No newline at end of file diff --git a/project_rag/alembic.ini b/project_rag/alembic.ini new file mode 100755 index 0000000..921aaf1 --- /dev/null +++ b/project_rag/alembic.ini @@ -0,0 +1,71 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = alembic + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# timezone to use when rendering the date +# within the migration file as well as the filename. +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +#truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat alembic/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/project_rag/alembic/README b/project_rag/alembic/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/project_rag/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/project_rag/alembic/env.py b/project_rag/alembic/env.py new file mode 100644 index 0000000..5f346ec --- /dev/null +++ b/project_rag/alembic/env.py @@ -0,0 +1,91 @@ +from __future__ import with_statement +from sqlalchemy.ext.asyncio import create_async_engine, AsyncEngine +from sqlalchemy.engine import Connection +import asyncio + +from alembic import context +from sqlalchemy import engine_from_config, pool +from logging.config import fileConfig + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +# target_metadata = None + +from app.db.base_class import Base # noqa +from app.db.session import SQLALCHEMY_DATABASE_URI +from app.models.podcast import Podcast, Episode, Summary + +target_metadata = Base.metadata + + +def get_url(): + return SQLALCHEMY_DATABASE_URI + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = get_url() + context.configure( + url=url, target_metadata=target_metadata, literal_binds=True, compare_type=True + ) + + with context.begin_transaction(): + context.run_migrations() + + +def do_run_migrations(connection: Connection) -> None: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + +async def run_async_migrations() -> None: + """In this scenario we need to create an Engine + and associate a connection with the context. + + """ + + configuration = config.get_section(config.config_ini_section) + configuration["sqlalchemy.url"] = get_url() + + # Using create_async_engine instead of engine_from_config + connectable: AsyncEngine = create_async_engine( + configuration["sqlalchemy.url"], + echo=True, + ) + + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + + await connectable.dispose() + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + + asyncio.run(run_async_migrations()) + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/project_rag/alembic/script.py.mako b/project_rag/alembic/script.py.mako new file mode 100644 index 0000000..2c01563 --- /dev/null +++ b/project_rag/alembic/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/project_rag/alembic/versions/728c3af72e3d_initial_tables.py b/project_rag/alembic/versions/728c3af72e3d_initial_tables.py new file mode 100644 index 0000000..3dca5d8 --- /dev/null +++ b/project_rag/alembic/versions/728c3af72e3d_initial_tables.py @@ -0,0 +1,51 @@ +"""initial tables + +Revision ID: 728c3af72e3d +Revises: +Create Date: 2024-02-24 17:02:03.549236 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '728c3af72e3d' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('podcast', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('episode', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('title', sa.String(length=255), nullable=False), + sa.Column('url', sa.String(length=512), nullable=True), + sa.Column('podcast_id', sa.Integer(), nullable=False), + sa.Column('transcript', sa.Text(), nullable=True), + sa.ForeignKeyConstraint(['podcast_id'], ['podcast.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('summary', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('content', sa.Text(), nullable=True), + sa.Column('episode_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['episode_id'], ['episode.id'], ), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('episode_id') + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('summary') + op.drop_table('episode') + op.drop_table('podcast') + # ### end Alembic commands ### diff --git a/project_rag/app/__init__.py b/project_rag/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/project_rag/app/config.py b/project_rag/app/config.py new file mode 100644 index 0000000..962d39c --- /dev/null +++ b/project_rag/app/config.py @@ -0,0 +1,56 @@ +import pathlib +from typing import Optional + +from dotenv import load_dotenv +from pydantic_settings import BaseSettings + +load_dotenv() + +# Project Directories +ROOT: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent +PROMPT_DIR: pathlib.Path = ROOT / 'data_engineering' / 'prompts' +TRANSCRIPT_DIR: pathlib.Path = ROOT / 'data' / 'transcripts' +SUMMARY_DIR: pathlib.Path = ROOT / 'data' / 'summaries' +MODEL_DIR: pathlib.Path = ROOT / 'data_engineering' / 'models' +INDEX_DIR: pathlib.Path = ROOT / "app" / "index_store" + +class LLMSettings(BaseSettings): + """ + Defines the settings for the Large Language Model (LLM). + + Attributes: + CONTEXT_WINDOW (int): The context window size for the LLM. + N_GPU_LAYERS (int): The number of GPU layers to be used by the LLM. + MAX_TOKENS (int): The maximum number of tokens to be generated in one response. + TEMPERATURE (float): The temperature setting for the LLM's creativity in responses. + MODEL (str): The identifier for the LLM model to be used. + TOGETHER_API_KEY (str): The API key for accessing the LLM, expected to be loaded from the environment. + """ + CONTEXT_WINDOW: int = 16000 + N_GPU_LAYERS: int = 1 + MAX_TOKENS: int = 512 + TEMPERATURE: float = 0.8 + MODEL: str = "mistralai/Mixtral-8x7B-Instruct-v0.1" + TOGETHER_API_KEY: str # picked up from environment + +class Settings(BaseSettings): + """ + Configuration settings for the application, including database and LLM configurations. + + Attributes: + SQLALCHEMY_DATABASE_URI (Optional[str]): The database connection URI. + llm (LLMSettings): Nested settings for configuring the Large Language Model. + """ + SQLALCHEMY_DATABASE_URI: Optional[str] = "sqlite:///example.db" + llm: LLMSettings = LLMSettings() + + class Config: + """ + Configuration class for settings. + + Attributes: + case_sensitive (bool): Specifies if the configuration keys should be case-sensitive. + """ + case_sensitive: bool = True + +settings: Settings = Settings() diff --git a/project_rag/app/crud/__init__.py b/project_rag/app/crud/__init__.py new file mode 100644 index 0000000..79c9a50 --- /dev/null +++ b/project_rag/app/crud/__init__.py @@ -0,0 +1 @@ +from .crud_podcast import podcast, episode, summary diff --git a/project_rag/app/crud/base.py b/project_rag/app/crud/base.py new file mode 100644 index 0000000..3f0c5ce --- /dev/null +++ b/project_rag/app/crud/base.py @@ -0,0 +1,75 @@ +from typing import Any, Dict, Generic, List, Optional, Type, TypeVar, Union + +from fastapi.encoders import jsonable_encoder +from pydantic import BaseModel +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy import select, delete +from sqlalchemy.future import select +from sqlalchemy.exc import SQLAlchemyError + +from app.db.base_class import Base # Adjust import to match your project structure + +ModelType = TypeVar("ModelType", bound=Base) +CreateSchemaType = TypeVar("CreateSchemaType", bound=BaseModel) +UpdateSchemaType = TypeVar("UpdateSchemaType", bound=BaseModel) + +class CRUDBase(Generic[ModelType, CreateSchemaType, UpdateSchemaType]): + def __init__(self, model: Type[ModelType]): + self.model = model + + async def get(self, db: AsyncSession, id: Any) -> Optional[ModelType]: + stmt = select(self.model).filter(self.model.id == id) + result = await db.execute(stmt) + return result.scalars().first() + + async def get_multi( + self, db: AsyncSession, *, skip: int = 0, limit: int = 100 + ) -> List[ModelType]: + stmt = select(self.model).offset(skip).limit(limit) + results = await db.execute(stmt) + return results.scalars().all() + + async def create(self, db: AsyncSession, *, obj_in: CreateSchemaType) -> ModelType: + obj_in_data = jsonable_encoder(obj_in) + db_obj = self.model(**obj_in_data) + db.add(db_obj) + try: + await db.commit() + await db.refresh(db_obj) + except SQLAlchemyError as e: + await db.rollback() + raise e + return db_obj + + async def update( + self, + db: AsyncSession, + *, + db_obj: ModelType, + obj_in: Union[UpdateSchemaType, Dict[str, Any]] + ) -> ModelType: + obj_data = jsonable_encoder(obj_in) + if isinstance(obj_in, dict): + update_data = obj_in + else: + update_data = obj_in.dict(exclude_unset=True) + for field in update_data: + if field in obj_data: + setattr(db_obj, field, update_data[field]) + try: + await db.commit() + await db.refresh(db_obj) + except SQLAlchemyError as e: + await db.rollback() + raise e + return db_obj + + async def remove(self, db: AsyncSession, *, id: int) -> Optional[int]: + stmt = delete(self.model).where(self.model.id == id) + try: + await db.execute(stmt) + await db.commit() + return id + except SQLAlchemyError as e: + await db.rollback() + raise e diff --git a/project_rag/app/crud/crud_podcast.py b/project_rag/app/crud/crud_podcast.py new file mode 100644 index 0000000..7220d28 --- /dev/null +++ b/project_rag/app/crud/crud_podcast.py @@ -0,0 +1,48 @@ +# app/crud/crud_podcast.py +import json +from typing import Any + +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select +from sqlalchemy.orm import selectinload + +from app.crud.base import CRUDBase +from app.models.podcast import Podcast, Episode, Summary +from app.schemas.podcast import PodcastCreate, PodcastUpdate, SummaryCreate, SummaryUpdate, EpisodeCreate, EpisodeUpdate + + +class CRUDPodcast(CRUDBase[Podcast, PodcastCreate, PodcastUpdate]): + pass + +podcast = CRUDPodcast(Podcast) + + +class CRUDEpisode(CRUDBase[Episode, EpisodeCreate, EpisodeUpdate]): + async def get(self, db: AsyncSession, id: Any) -> Episode | None: + stmt = select(Episode).options(selectinload(Episode.summary)).filter(Episode.id == id) + result = await db.execute(stmt) + episode = result.scalars().first() + + if episode and episode.summary and episode.summary.content: + # Assuming summary.content is a stringified JSON, parse it into a Python object + episode.summary.content = json.loads(episode.summary.content) + + return episode + + async def get_multi(self, db: AsyncSession, *, skip: int = 0, limit: int = 100) -> list[Episode]: + stmt = select(Episode).options(selectinload(Episode.summary)).offset(skip).limit(limit) + results = await db.execute(stmt) + episodes = results.scalars().all() + for episode in episodes: + if episode.summary and episode.summary.content: + # Assuming summary.content is a stringified JSON + # n.b. this is very inefficient + episode.summary.content = json.loads(episode.summary.content) + return episodes + +episode = CRUDEpisode(Episode) + +class CRUDSummary(CRUDBase[Summary, SummaryCreate, SummaryUpdate]): + pass + +summary = CRUDSummary(Summary) \ No newline at end of file diff --git a/project_rag/app/db/__init__.py b/project_rag/app/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/project_rag/app/db/backend_pre_start.py b/project_rag/app/db/backend_pre_start.py new file mode 100644 index 0000000..03e0770 --- /dev/null +++ b/project_rag/app/db/backend_pre_start.py @@ -0,0 +1,27 @@ +import logging +import asyncio + +from app.db.session import AsyncSessionLocal + +from sqlalchemy import text + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +async def init() -> None: + async with AsyncSessionLocal() as db: + try: + # Try to create session to check if DB is awake + await db.execute(text("SELECT 1")) + await db.commit() # Ensure any transaction is committed. + except Exception as e: + logger.error(e) + raise e + +async def main() -> None: + logger.info("Initializing service") + await init() + logger.info("Service finished initializing") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/project_rag/app/db/base.py b/project_rag/app/db/base.py new file mode 100644 index 0000000..79d1898 --- /dev/null +++ b/project_rag/app/db/base.py @@ -0,0 +1,3 @@ +# Import all the models, so that Base has them before being +# imported by Alembic +from app.models.podcast import Podcast, Episode, Summary # noqa \ No newline at end of file diff --git a/project_rag/app/db/base_class.py b/project_rag/app/db/base_class.py new file mode 100644 index 0000000..860e542 --- /dev/null +++ b/project_rag/app/db/base_class.py @@ -0,0 +1,3 @@ +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() diff --git a/project_rag/app/db/init_db.py b/project_rag/app/db/init_db.py new file mode 100644 index 0000000..9597f15 --- /dev/null +++ b/project_rag/app/db/init_db.py @@ -0,0 +1,73 @@ +import logging + +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select + +from app.config import TRANSCRIPT_DIR +from app.db.base_class import Base # noqa: F401 +from app.models.podcast import Podcast, Episode + +logger = logging.getLogger(__name__) + +# make sure all SQL Alchemy models are imported (app.db.base) before initializing DB +# otherwise, SQL Alchemy might fail to initialize relationships properly +# for more details: https://github.com/tiangolo/full-stack-fastapi-postgresql/issues/28 + + +async def init_db(db: AsyncSession) -> None: + # Tables should be created with Alembic migrations + # But if you don't want to use migrations, create + # the tables uncommenting the next line + # Base.metadata.create_all(bind=engine) + + # Check if the podcast already exists + result = await db.execute(select(Podcast).where(Podcast.name == "Developer Tea")) + podcast = result.scalars().first() + + # Create a new podcast if it doesn't exist + if not podcast: + podcast = Podcast(name="Developer Tea") + db.add(podcast) + await db.flush() # Flushing is necessary to populate the podcast with an ID before committing + + episodes_data = [ + { + "title": "Finding Leverage by Escaping Functional Fixedness", + "url": "https://podcasts.apple.com/gb/podcast/finding-leverage-by-escaping-functional-fixedness/id955596067?i=1000643042262", + "transcript_file": "developer_tea_episode_1191.txt" # Adjust filename as necessary + }, + { + "title": "9 Years Persistence by Reducing Expectation", + "url": "https://podcasts.apple.com/gb/podcast/9-years-persistence-by-reducing-expectation/id955596067?i=1000640625251", + "transcript_file": "developer_tea_episode_1191.txt" # Adjust filename as necessary + }, + # Add more episodes as needed + ] + # Create episodes with transcripts + episodes = [] + for entry in episodes_data: + try: + transcript_path = TRANSCRIPT_DIR / entry["transcript_file"] + with open(transcript_path, 'r', encoding='utf-8') as file: + transcript = file.read() + except IOError as e: + logger.error(f"Failed to read transcript file {entry['transcript_file']}: {e}") + transcript = None # Use None or some placeholder text if the transcript cannot be read + + episode = Episode( + title=entry["title"], + url=entry["url"], + podcast=podcast, + transcript=transcript + ) + episodes.append(episode) + + # Add the episodes to the session + db.add_all(episodes) + await db.flush() # Flushing is necessary to populate the episodes with IDs before committing + + # Commit the changes + await db.commit() + + # Close the session + await db.close() diff --git a/project_rag/app/db/session.py b/project_rag/app/db/session.py new file mode 100644 index 0000000..0ed61bb --- /dev/null +++ b/project_rag/app/db/session.py @@ -0,0 +1,18 @@ +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession +from sqlalchemy.orm import sessionmaker + +SQLALCHEMY_DATABASE_URI = "sqlite+aiosqlite:///example.db" + +engine = create_async_engine( + SQLALCHEMY_DATABASE_URI, + # No need for "check_same_thread" in async mode since aiosqlite handles async access + echo=True, # Optional, for debugging +) + +# AsyncSession configuration +AsyncSessionLocal = sessionmaker( + expire_on_commit=False, + autoflush=False, + bind=engine, + class_=AsyncSession, +) diff --git a/project_rag/app/db/update_db.py b/project_rag/app/db/update_db.py new file mode 100644 index 0000000..c2855ca --- /dev/null +++ b/project_rag/app/db/update_db.py @@ -0,0 +1,79 @@ +import json +import logging + +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select + +from app.config import TRANSCRIPT_DIR, SUMMARY_DIR +from app.db.base_class import Base # noqa: F401 +from app.models.podcast import Podcast, Episode, Summary + +logger = logging.getLogger(__name__) + +# make sure all SQL Alchemy models are imported (app.db.base) before initializing DB +# otherwise, SQL Alchemy might fail to initialize relationships properly +# for more details: https://github.com/tiangolo/full-stack-fastapi-postgresql/issues/28 + + +async def update_db(db: AsyncSession) -> None: + # Tables should be created with Alembic migrations + # But if you don't want to use migrations, create + # the tables uncommenting the next line + # Base.metadata.create_all(bind=engine) + + # Check if the podcast already exists + result = await db.execute(select(Podcast).where(Podcast.name == "Developer Tea")) + podcast = result.scalars().first() + + # Create a new podcast if it doesn't exist + if not podcast: + podcast = Podcast(name="Developer Tea") + db.add(podcast) + await db.flush() # Flushing is necessary to populate the podcast with an ID before committing + + episodes_data = [ + { + "title": "Finding Leverage by Escaping Functional Fixedness", + "summary_file": "developer_tea_episode_1191_summary.json" + }, + { + "title": "9 Years Persistence by Reducing Expectation", + "summary_file": "9_years_persistence_by_reducing_expectation_summary.json" + }, + # Add more episodes as needed + ] + + # Iterate over episodes to read summaries and update + for entry in episodes_data: + # Read the summary file + try: + summary_path = SUMMARY_DIR / entry["summary_file"] + with open(summary_path, 'r', encoding='utf-8') as file: + summary_text = file.read() + except IOError as e: + logger.error(f"Failed to read summary file {entry['summary_file']}: {e}") + continue # Skip this episode if summary file can't be read + + # Fetch the corresponding episode from the database + episode_result = await db.execute(select(Episode).where(Episode.title == entry['title'], Episode.podcast_id == podcast.id)) + episode = episode_result.scalars().first() + + if episode: + # This check might cause lazy-loading outside an async context + # if episode.summary: + + # Instead, directly check if a summary exists by attempting to fetch it + summary_result = await db.execute(select(Summary).where(Summary.episode_id == episode.id)) + summary = summary_result.scalars().first() + + if summary: + summary.content = json.dumps(summary_text) # Serialize the Python dict to a JSON string + else: + new_summary = Summary(content=summary_text, episode_id=episode.id) + db.add(new_summary) + + # Commit the changes to the database + await db.commit() + + # Close the session + await db.close() \ No newline at end of file diff --git a/project_rag/app/deps.py b/project_rag/app/deps.py new file mode 100644 index 0000000..3c56db8 --- /dev/null +++ b/project_rag/app/deps.py @@ -0,0 +1,28 @@ +from typing import AsyncGenerator +from sqlalchemy.ext.asyncio import AsyncSession + +from llama_index.llms.together import TogetherLLM + +from app.config import settings +from app.db.session import AsyncSessionLocal + +async def get_db() -> AsyncGenerator[AsyncSession, None]: + """ + Asynchronous generator that provides a database session. + + This function asynchronously provides a scoped session for interacting with the database. It ensures that the session + is correctly closed after use, even in the event of errors, by using an async context manager. + + Yields: + AsyncSession: The SQLAlchemy asynchronous session object for database operations. + """ + async with AsyncSessionLocal() as db: + yield db + +def get_llm() -> TogetherLLM: + return TogetherLLM( + model=settings.llm.MODEL, + api_key=settings.llm.TOGETHER_API_KEY, + max_tokens=settings.llm.MAX_TOKENS, + context_window=settings.llm.CONTEXT_WINDOW + ) diff --git a/project_rag/app/index_store/.gitkeep b/project_rag/app/index_store/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/project_rag/app/initial_data.py b/project_rag/app/initial_data.py new file mode 100644 index 0000000..8cb240e --- /dev/null +++ b/project_rag/app/initial_data.py @@ -0,0 +1,23 @@ +import asyncio +import logging + +from app.db.init_db import init_db +from app.db.session import AsyncSessionLocal + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +async def init() -> None: + async with AsyncSessionLocal() as db: + await init_db(db) + + +async def main() -> None: + logger.info("Creating initial data") + await init() + logger.info("Initial data created") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/project_rag/app/main.py b/project_rag/app/main.py new file mode 100644 index 0000000..1155f7f --- /dev/null +++ b/project_rag/app/main.py @@ -0,0 +1,101 @@ +from contextlib import asynccontextmanager +from pathlib import Path +from typing import Any, AsyncGenerator, Dict + +from fastapi import FastAPI, APIRouter, HTTPException, Request, Depends +from fastapi.responses import StreamingResponse +from fastapi.templating import Jinja2Templates +from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex +from llama_index.core.indices.base import BaseIndex, BaseQueryEngine +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.llms.openai_like import OpenAILike +from sqlalchemy.ext.asyncio import AsyncSession + +from app import crud +from app import deps +from app.config import INDEX_DIR +from app.schemas.chatbot import ChatInput +from app.schemas.podcast import Episode + +# Project Directories +ROOT: Path = Path(__file__).resolve().parent.parent +BASE_PATH: Path = Path(__file__).resolve().parent +TEMPLATES = Jinja2Templates(directory=str(BASE_PATH / "templates")) + +# Global index storage +INDEX: Dict[str, Any] = {} + +async def load_rag_index(index_dir: Path) -> BaseIndex: + storage_context = StorageContext.from_defaults(persist_dir=index_dir) + embed_model = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1") + return load_index_from_storage(storage_context, embed_model=embed_model) + + +@asynccontextmanager +async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: + INDEX['rag_index'] = await load_rag_index(index_dir=INDEX_DIR) + yield + INDEX.clear() + +app = FastAPI(title="Podcast Summarizer", lifespan=lifespan) +api_router = APIRouter() + +@api_router.get("/", status_code=200) +async def root( + request: Request, + db: AsyncSession = Depends(deps.get_db), +) -> Any: + """ + Serves the homepage with a list of podcast episodes. + + Args: + request (Request): The request object. + db (AsyncSession): Database session dependency. + + Returns: + Any: A template response rendering the homepage. + """ + episodes = await crud.episode.get_multi(db=db, limit=10) + return TEMPLATES.TemplateResponse("index.html", {"request": request, "episodes": episodes}) + +@api_router.get("/episode/{episode_id}", status_code=200, response_model=Episode) +async def fetch_episode( + *, + episode_id: int, + db: AsyncSession = Depends(deps.get_db), +) -> Any: + """ + Fetches a single episode by its ID. + + Args: + episode_id (int): The unique identifier of the episode. + db (AsyncSession): Database session dependency. + + Returns: + Any: The episode data or an HTTP 404 error if not found. + """ + result = await crud.episode.get(db=db, id=episode_id) + if not result: + raise HTTPException(status_code=404, detail=f"Episode with ID {episode_id} not found") + return result + +@api_router.post("/inference/stream/", status_code=200, response_model=str) +async def run_chat_inference_stream( + chat_input: ChatInput, + llm: OpenAILike = Depends(deps.get_llm), +) -> Any: + index: VectorStoreIndex = INDEX["rag_index"] + query_engine: BaseQueryEngine = index.as_query_engine(streaming=True, llm=llm) + response = query_engine.query(chat_input.user_message) + return StreamingResponse(response.response_gen, media_type="text/event-stream") + +@api_router.get("/chatbot", status_code=200) +async def ui(request: Request) -> Any: + return TEMPLATES.TemplateResponse("chatbot.html", {"request": request}) + +app.include_router(api_router) + +if __name__ == "__main__": + # Use this for debugging purposes only + import uvicorn + uvicorn.run("app.main:app", host="0.0.0.0", port=8001, log_level="debug", reload=True) diff --git a/project_rag/app/models/__init__.py b/project_rag/app/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/project_rag/app/models/podcast.py b/project_rag/app/models/podcast.py new file mode 100644 index 0000000..c881137 --- /dev/null +++ b/project_rag/app/models/podcast.py @@ -0,0 +1,27 @@ +from app.db.base_class import Base +from sqlalchemy import Column, Integer, String, ForeignKey, Text +from sqlalchemy.orm import relationship + + +class Podcast(Base): + __tablename__ = 'podcast' + id = Column(Integer, primary_key=True) + name = Column(String(255), nullable=False) + episodes = relationship("Episode", back_populates="podcast") + +class Episode(Base): + __tablename__ = 'episode' + id = Column(Integer, primary_key=True) + title = Column(String(255), nullable=False) + url = Column(String(512)) + podcast_id = Column(Integer, ForeignKey('podcast.id'), nullable=False) + podcast = relationship("Podcast", back_populates="episodes") + summary = relationship("Summary", back_populates="episode", uselist=False) + transcript = Column(Text, nullable=True) + +class Summary(Base): + __tablename__ = 'summary' + id = Column(Integer, primary_key=True) + content = Column(Text, nullable=True) + episode_id = Column(Integer, ForeignKey('episode.id'), unique=True) + episode = relationship("Episode", back_populates="summary") diff --git a/project_rag/app/schemas/__init__.py b/project_rag/app/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/project_rag/app/schemas/chatbot.py b/project_rag/app/schemas/chatbot.py new file mode 100644 index 0000000..b62bcde --- /dev/null +++ b/project_rag/app/schemas/chatbot.py @@ -0,0 +1,12 @@ +from pydantic import BaseModel + + +class ChatInput(BaseModel): + """ + Represents the input for the chat inference endpoint. + + Attributes: + user_message (str): The user's message to the AI model. + """ + + user_message: str = "Tell me about Developer Tea" \ No newline at end of file diff --git a/project_rag/app/schemas/podcast.py b/project_rag/app/schemas/podcast.py new file mode 100644 index 0000000..ce0bab1 --- /dev/null +++ b/project_rag/app/schemas/podcast.py @@ -0,0 +1,56 @@ +# app/schemas/podcast.py +from pydantic import BaseModel + +class PodcastBase(BaseModel): + title: str + +class PodcastCreate(PodcastBase): + pass + +class PodcastUpdate(PodcastBase): + pass + +class PodcastInDBBase(PodcastBase): + id: int + + class Config: + from_attributes = True + + +class EpisodeBase(BaseModel): + title: str + url: str + podcast_id: int + +class EpisodeCreate(EpisodeBase): + pass + +class EpisodeUpdate(EpisodeBase): + pass + +class EpisodeInDBBase(EpisodeBase): + id: int + + class Config: + from_attributes = True + + +# Properties to return to client +class Episode(EpisodeInDBBase): + pass + +class SummaryBase(BaseModel): + content: str + episode_id: int + +class SummaryCreate(SummaryBase): + pass + +class SummaryUpdate(SummaryBase): + pass + +class SummaryInDBBase(SummaryBase): + id: int + + class Config: + from_attributes = True diff --git a/project_rag/app/templates/chatbot.html b/project_rag/app/templates/chatbot.html new file mode 100644 index 0000000..c91eff2 --- /dev/null +++ b/project_rag/app/templates/chatbot.html @@ -0,0 +1,87 @@ + + + + + + Chat Interface + + + + +
+

Generic Chatbot

+

Batch vs. Stream...

+
+ +
+
+
+

Streaming Chat

+
+ + +
+

Stream Response:

+
+
+
+ + + + + + diff --git a/project_rag/app/templates/index.html b/project_rag/app/templates/index.html new file mode 100644 index 0000000..c15dad4 --- /dev/null +++ b/project_rag/app/templates/index.html @@ -0,0 +1,63 @@ + + + + + + + Podcast Summaries + + +
+
+
+

+ tl;dl - Too Long, Didn't Listen. Podcast Summaries. +

+

+ Developer Tea Episode Summaries... +

+
+ {% for episode in episodes %} +
+
+

+ {{ episode.title }} +

+ {% if episode.summary %} +

+ {{ episode.summary.content['summary'] | default('No summary available', true) }} +

+

+ "{{ episode.summary.content['quote'] | default('No quote available', true) }}" +

+ {% else %} +

+ No summary available. +

+

+ No quote available. +

+ {% endif %} +
+
+ +
+
+ {% endfor %} +
+ + +
+ + \ No newline at end of file diff --git a/project_rag/app/update_data.py b/project_rag/app/update_data.py new file mode 100644 index 0000000..1733ed2 --- /dev/null +++ b/project_rag/app/update_data.py @@ -0,0 +1,23 @@ +import asyncio +import logging + +from app.db.update_db import update_db +from app.db.session import AsyncSessionLocal + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +async def update() -> None: + async with AsyncSessionLocal() as db: + await update_db(db) + + +async def main() -> None: + logger.info("Adding summary data") + await update() + logger.info("Summary data added") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/project_rag/data/transcripts/9_years_persistence_by_reducing_expectation.txt b/project_rag/data/transcripts/9_years_persistence_by_reducing_expectation.txt new file mode 100644 index 0000000..25a3145 --- /dev/null +++ b/project_rag/data/transcripts/9_years_persistence_by_reducing_expectation.txt @@ -0,0 +1,186 @@ +it's a little difficult for me to believe that it's been nine years as of the day that this +podcast this episode releases it's been nine years since this podcast started and i'm so grateful +for everything that i've learned on this show by the way i've got a little bit of a cold you might +be hearing that but i couldn't let this particular date pass without releasing an episode cold or not +this podcast has meant so much to me uh over the last nine years don't worry i'm not getting ready +to announce that we're canceling or anything like that i just want to take a moment to reflect +and say thank you it's very rare that i talk about myself my personal feelings on this show +at least anything deeper than my opinions but i'm very fulfilled by the opportunity to do this +podcast and i know the years it has changed and it will continue to change just like i will continue +to change and that's actually what we're going to talk about after this reflection this moment +of reflection for me we're going to talk about how you can configure your brain +for change to invite change but i do want to take a moment and just show my appreciation for all of +you who are listening right now maybe this is the first time you're listening maybe you've been +listening for nine years maybe you followed us all the way through when we were a part of the +spec network maybe you've been with us since the very beginning in our earliest sponsored days +or throughout all of the many interviews that we've done with the awesome guests whatever +your journey with developer t has been +i just want to say that i sincerely appreciate that you're spending a few moments of your life +listening to me talk on this podcast we may never meet in person some of you i have met +in person or i've met you virtually online but this is such a meaningful thing to me it's become +a part of my kind of routine my self-reflection routine so many times something will happen in +my personal life i'll have some experience or i'll have some insight while i'm just +hanging out with my family or when i'm exercising or if i'm participating in one of my ridiculously +long list of uh of hobbies that i have and this show is always in the back of my mind if i have +an epiphany that i feel like is worth sharing uh it happens in all parts of my life and this truly +is an extension of who i am so thank you so much uh genuinely from the bottom of my heart +i've enjoyed doing this so much and i can't wait to do more of it +i have been asked many times uh either implicitly or explicitly how i've done this show for so long +how can i keep going for so long and the truth is that i don't feel like i've done the same thing +for nine years most of the people who listen to the show you might uh perceive that a little bit +differently because the show in large part when it actually gets delivered uh when you're when +you're hearing the show you're still hearing the same +true music as we started with nine years ago right so there are uh not a not a ton has changed about +this me standing in front of a microphone and sharing my opinions sharing insights from my +experience in the industry uh you know interviewing other people all of these format things have not +changed drastically we have had some shifts in topics etc but the way that i interact with +what i'm doing has changed significantly there have been scheduled changes there have been +patterns of change there have been changes in the way that i interact with what i'm doing +there have been patterns of change there have been changes in the way that i interact with what i'm doing +there have been patterns in my life that i've been able to adjust so that the output of the show +doesn't really necessarily have to change very much but everything surrounding it can change +pretty drastically especially my own expectations this is probably the critical factor and the one +i want to hone in on in this discussion today my expectations for what this show could or should be +have changed on a very regular basis over the course of my life +the last nine years and in some ways that affects what i do with the show in terms of the types of +content that i might produce or the kinds of guests i might have on but the biggest thing +that it allows me to do is continue regardless of what happens this is the key to persistence at +least in this particular case the key to persistence for me and i suspect that the key to persistence +for you may look very similar adjusting your own expectations or a better way to put it is +reducing the power of your expectations reducing the power of your expectations +now this sounds like i'm going to go into some kind of like buddhist teaching or something here +but that's not really the intention the intention is to identify areas where you have an expectation +of what will occur +whether it's with a podcast or with your code or with your career with your family or friends +looking at those expectations and holding them loosely because here's what can happen +if we build up an expectation if we build up an expectation we begin to wrap our actions +in our thoughts around that expectation and instead of thinking about the thing that we're +doing in that moment we're going to be thinking about the thing that we're doing in that moment +instead of thinking about say the process instead of thinking about writing that particular piece +of code with quality in mind we start to think about some expectation and by the way expectation +doesn't necessarily have to be some irrational you know load that you're putting on a situation +it can be totally rational your expectation of what should happen what is likely to happen what +could happen all of these things are going to be going to be going to be going to be going to be +these types of expectations we bring them to the table we begin to wrap all of what we do around +them and in fact the purpose for the thing that we're doing becomes the expectation +think about this as it plays out in your normal life or in your life as a software engineer +let's imagine that you are working your job as a associate software engineer whatever your +entry-level position or mid-level position software engineer and in no uncertainty +certain terms you really love your job you enjoy your day-to-day work you enjoy the people you work +with you enjoy the kind of problems that you're solving you feel sufficiently challenged you feel +excited but that little expectation starts to grow inside of you the expectation of getting +a promotion of achieving a particular title or maybe even a salary your work continues you keep +on enjoying every day that you walk into work but now the expectation is kind of +living alongside that you know that reviews are around the corner you're going to be either up +for promotion or not you talk to your manager they put you up for promotion and you get a rejection +now this expectation that you had has created a differential it's created a picture of something +that you ostensibly thought would be good for you or would be desirable for you this fork in the road +but it's +all a fiction it never actually happened it's just a comparison point that you've created +uh in your own mind it's all a fiction and so now you're in some kind of suffering +uh you're experiencing a negative feeling you're experiencing frustration or or maybe you're +feeling a sense of imposter syndrome maybe you begin to develop resentment towards your manager +or resentment towards your manager or resentment towards your manager or resentment towards your +people who uh ultimately made the decision not to promote you and what otherwise was an excellent +job that you were enjoying yourself you could have continued on the promotion had you not developed +an expectation a promotion would be a bonus right something that you appreciate instead your +expectation turned it into something that you felt like you lost now let's think about this for a +second you lost your expectation you lost your expectation you lost your expectation you lost your +expectation you lost your expectation you obviously didn't lose +anything but because you've trained your mind into thinking about an expectation +your mind is an incredibly powerful thing right you've developed this very thorough story about +yourself in a future world where that expectation has been met and now you're living in a world where +that expectation has not been met and because your brain can't really tell the difference between +imagination and reality you're not able to tell the difference between imagination and reality +well you actually feel a sense of loss when i first started developer t i actually had the +opposite experience i didn't really know what was going to happen with the podcast i was very +fortunate i was uh very lucky that the podcast gained the traction that it did and i had the +opposite experience it it was my expectation that i would continue working on the podcast for years +before it ever gained traction or something like that uh but it gained traction very early and so +something broke my expectation and i was like oh my god i'm gonna lose my mind i'm gonna lose my +positive way but then as the years went on the previous performance of the podcast i thought i +was having a rational response as i was looking at the numbers they can look at the listenership +for example and if one episode dipped below another episode i realized that there was a +differential between my expectation of how that podcast episode should have performed and how it +and now i'm trying to rationalize in my mind the reason for the differential just like you might do +that if you didn't get the promotion well why didn't i get the promotion maybe i'm not good +enough for the promotion maybe i screwed something up maybe i said something wrong maybe that +particular project i could have worked extra hours on and maybe that would have impressed +my boss's boss's boss all of these things are born out of the differential between reality and +our expectation and so i would spend +time trying to analyze what kind of content is it that has a better listenership than other content +what kind of seasonality should i expect there's all of these things that i was trying to calibrate +my expectations i was trying to change the way that i would expect so i could be more accurate +with my expectations in the future and this is again a rational thing to do we want to be able +to predict the future it's one of the things that we really want to do as humans because predicting +the future allows us to reduce risk and +also take advantage of benefits that we can see coming down the road so predicting the future is +a natural tendency to want to uh try to you know evaluate our expectations and tune them so that +they're better but what i realized is that all of that tuning was not really the core of why i +started the show to begin with i didn't start this show as crazy as it seems uh purely for the +pursuit of making the show as crazy as it seems i didn't start the show as crazy as it seems +podcasts are not the money-making tree that you might expect that they are i've been fortunate +enough to make some money with this podcast you know that because you've heard episodes in the +past where i've had sponsors but i didn't start this with that in mind and so the listenership +of the show started to compete for the core reason i started this podcast in the first place +it took me a while to find the tagline that you all probably have heard +but to help developers find clarity perspective and purpose is my core reason now if i can have +if i can help more developers do that then even better that's great but what i failed to do was +recognize that the number of engineers listening to the show the number of uh you know audience +per episode the number of unique downloads i was starting to tune myself towards that number +rather than allowing that number to be an outcome that i don't have any control over +i was trying to control that outcome i was trying to align my expectations and correct +my process to improve the outcomes towards my expectations sometimes this is a reasonable +thing to do sometimes you should do this but in this case i was losing the plot i was losing the +core reason why i started the show in the first place so one of the things that i'm doing i've +been doing it for a couple of years i've been doing it for a couple of years i've been doing it for a couple of years +now is i'm actively trying to reduce my expectations of developer t and in other areas of my life +reduce the uh the uh the leverage that my expectations have over my life because the +truth is in that situation where you didn't get that promotion that you thought you were going to +get it's very tempting to say you know what that's it i'm throwing in the towel i'm leaving this +company i'm going to go somewhere that actually respects me and i'm going to go somewhere that +me you know respects my my experience and it's going to give me the title that i want and this +is how uh your expectations can stop you from continuing something that you really love doing +you really love that job but you decided to stop it because your expectations created a differential +between your reality which by the way you really enjoyed and some other reality that never was true +when listenership has waxed +and waned over the years of this show i could have thrown in the towel i could have said you +know what i can't attract the same number of listeners i can't attract the same kinds of +sponsors anymore because my listenership is down and therefore it's not worth doing my expectations +of this show uh have have failed in that case right i i expected to have x number of listeners +therefore i could get y number of sponsors and i've had less than x therefore the show +is over and i'm not going to be able to do it again i'm not going to be able to do it again +no longer worth doing this again is losing the plot right so evaluate those expectations i really +encourage you to take a few minutes uh whether you're doing it now or later um write down some +areas of your life where you have maybe in the past year upon reflection you've had some +expectations that you failed to meet whether it's expectations of yourself maybe it's expectations +of other people something that was significant +and try to imagine what it would have been like had you not developed expectations it's very hard +to do by the way don't don't get me wrong this is not something that's it's easy to do but when we +develop expectations and then we invest in those expectations that's really where we go wrong +expectations on their own are kind of a natural byproduct of our thinking brain we have some +expectation of what will happen but when we start to invest in that expectation we're not going to +when we start to plan on it we start making changes based on it that's where things can get +a little bit wonky now briefly i want to make sure that you understand i'm not talking about +creating plans plans are these longer term expectations we have of what we might do in +the future instead think about this as the meta layer for example i expect that i will fulfill +all of my plans +this is a meta expectation that i have of what's going to happen +with my planning process my planning is going to be perfect that would be an expectation +the plan itself is just what you think about doing what you're planning to do is not necessarily an +expectation of how things are going to go probably the easiest line to draw in the sand is expectations +are about outcomes that you can't control while plans are more like inputs that you can control +our expectations are about what you think about doing what you're planning to do and what you're +doing Dent convert which can quickly your expectations reflect what we believe about the efficacy of our +planning but our expectations also mask our inability to recognize all of the many factors +that may impact outcomes beyond our control ultimately i've been incredibly happy to do +this show and i expect to continue being happy doing this show uh and the way that that that +I ensure that that will happen is that I focus on that core reason why I started doing the show in +the first place. And if that disappears, then I will. I will shut the show down, but I don't see +that disappearing anytime soon. And that's because of all of you. Thanks so much for listening to +today's episode of Developer Tea. If you'd like to discuss this episode, or if you'd like to just +chat, come and join us in the Developer Tea Discord community. That's developertea.com +slash discord. That's free. It always will be free. There's tons of other community members +in that Discord. Come and check it out, developertea.com slash discord. I'm in there. +You could send me a message, ask me a question, ask me for advice. I'm there to chat as are other +members of the community. Thanks so much for listening. And until next time, enjoy your tea. diff --git a/project_rag/data/transcripts/developer_tea_episode_1191.txt b/project_rag/data/transcripts/developer_tea_episode_1191.txt new file mode 100644 index 0000000..6dd76ad --- /dev/null +++ b/project_rag/data/transcripts/developer_tea_episode_1191.txt @@ -0,0 +1,3 @@ +A large portion of the people who listen to this show are very early in +their career. And it makes sense, right? Not everybody is going to try out +software development and stick with it. Now I hope that the people who listen to this show, unless you really don't enjoy it, I really hope that you find the fortitude and to go back to a previous episode, the grit to stick with software development even when it gets difficult. But there's another problem that's facing developers, particularly young developers who haven't really gained a lot of experience in any industry, not only development, but any industry. And this also faces developers who are not necessarily young. You've had other parts of your career and you do have experience, but you don't have much as a developer. You're listening to Developer Tea. My name is Jonathan Cutrell. And my job here is to help you become a better developer. I do that by coaching you through the hard parts of being a developer. Now certainly sometimes you're going to face code complications, right? You're going to face difficult problems that you have to solve in code. But by and large, the hardest part of your career on average is not going to be the code. Most of the code that we write, we generally know how to write code decently well. Now the audience that I just now pointed this episode at, you're still learning how to write code. But even for you, most of your problems, most of the hard stuff, the stuff that differentiates a OK developer from a great developer. And I think you want to be a great developer. The stuff that differentiates those two people, generally speaking is not only going to be related to your code. Now you can't ignore your code. You can't ignore the product that you're building. But you absolutely can't ignore the things that we're talking about in this developer career roadmap series, the traits of a great developer. And today is no exception. Here's the reality. Every great developer had a starting point and every great developer at some point in their lives had zero experience. They were introduced to software development. They were introduced to, you know, a front end web development. They were introduced to CSS for the first time. At some point they wrote poor code. At some point they were really bad at what they were doing. And this is a very common thing that helps young developers in really beginners in all spheres, not just in development. It helps us kind of gain perspective that everybody was a beginner at one point. But we want to talk about not just the fact that everybody was really bad at one point. That's not enough. We need to understand what was the track that those beginners got on in order to become successful. And that's what we're trying to uncover on this show all the time. We want to understand what helps you both become a lifelong learner, right? Become a better developer every single day. But also what helps you excel beyond someone else who has the same technical skills as you, right? Technical skill is not all there is to the story. And that's really the moral of that whole intro is that, you know, you as a software developer, whether you're a beginner or if you're very experienced, your technical skill is only part of the story. So if you are very young in your career, what is a trait that can help you overcome the boundary of a lack of experience? And perhaps we should talk for a moment about why experience is a good thing. Certainly experience provides you with perspective, right? It gives you some information to draw on from the past to inform your current decision making. But unfortunately, experience is not something that you can just kind of snap your fingers and decide to have. It's not a trait that you can develop. It is only something that comes as a result of time and effort. And unfortunately, no matter what you do, we all are essentially bound by the same kind of time constraints, right? We can try to make more time in our day. And there's that's one of the reasons why there's so many good podcast episodes out there about becoming more productive and making the most of your time. And I encourage you to take the time to invest in, you know, setting yourself up for success in that way, right? Eliminating waste and providing yourself with the time that you need to be able to work on your career. But ultimately, there is a ceiling there, right? The ceiling is that everyone has a rate limit that they can't lift of experience. Of course, experience is something that you will gain over time. And it's important that you set yourself up for valuable experiences, right? So if you are getting an internship, I recommend that you get an internship that challenges you, for example, but what can you do when you are inexperienced? And let's talk for a moment about what an inexperienced person lacks. They lack that context that an experienced person has. They may lack the perspective. They may not have the information that they need from historical experiences that they've gone through to be able to make the best decision. So how can you be successful when you don't have experience? That's what we're going to talk about right after we talk about today's sponsor, Fuse. For the most part, if you've been developing a native mobile applications for very long at all, then you know that the tool set is largely remaining static. It's been the same for quite a while on both of the major platforms and also the third major platform. You probably know which three I'm talking about. And that's what Fuse is looking to change. Fuse has unique features that include a cross-platform, component-based UI engine, and real-time workflow where every change you make to the code is instantly reflected on your devices, and in the desktop simulator that comes with Fuse. It runs on Mac OS, but it also runs on Windows and it lets you make a real native apps for iOS and for Android. If Fuse installers include everything you need to get started and there's no complicated setup process. Now if you've actually worked in game development, if you used a program called Unity, I actually used this when I was in school, then that's kind of for the game sphere. This is essentially like Unity for app development. Fuse is officially at 1.0 status and they're no longer in beta, which is a big deal. This means that you can start using this stuff to build production products. They have Fuse Studio, which is the new premium editor in Workspace for working with Fuse projects, and this is included in the professional plan. And of course, that introduces their paid professional plan. This is the Fuse built UI kit that comes with this camera components, premium charting, lots of stuff that comes with this paid plan, but most people don't even need the paid plan. So I encourage you to go and check out what Fuse provides for free to you, totally free, by going to fuse tools.com slash plans. Now if you do decide that you want to try out that professional plan, then it's going to be 70% off for you as a developer, T-listener. If you use the promo code DT, I want you to catch that. 70% off for 12 months worth of Fuse premium. And you can go and check out what Fuse has to offer by going to fuse tools.com slash plan. Remember, 70% off for 12 months if you use the code DT. And those codes have to be redeemed by December 31st of 2017. So if you're listening this episode in the future, then it's past that date, then unfortunately this code is probably expired. But most of you are going to be listening to this before that date. So I encourage you to go and check it out once again, fuse tools.com slash plans. Thanks again to Fuse for sponsoring today's episode of Developer Tea. So how can a developer with little to no experience, someone who is learning to code and they're ready to get started, ready to look for a job, they're ready to look for an internship. You know, we talk about the value of internships in the developer career roadmap. And you've identified some places that you want to work for. Maybe you've identified in a places that are going to challenge you and give you kind of a path for growth, but you have zero experience. And ultimately what that means is that you can't really have confidence. And this is the key word that I want you to kind of wrap your mind around for a second confidence is the result of experience. Once you start gaining experience, that is how you gain confidence because confidence is informed by those experiences. You have confidence because you believe in your own abilities to be able to solve problems. And the only way you can believe in your own abilities to solve problems is if you actually have experience solving problems. Now, I want to be very clear here. I want to draw a line, a distinct line. There's a very very big difference between believing that you are capable in the future, believing that you are capable of accomplishing something and having confidence that you can accomplish something. Believing that you're capable of accomplishing something can not be informed by any experience at all. You could be wrong or you could be right. And you believe in your own abilities and you believe in the raw materials and that first experience, you believe you have what it takes for that first experience to be successful. That's not the kind of confidence or belief that we're talking about in this episode in order for you to have confidence in the way that we're talking about it here, you need that experience. So how do you operate without experience? We've been asking this over and over. And the reality is you must operate through bravery. Bravery is kind of a difficult word to say on a development podcast because so much of what we do is centered around being able to validate what we're doing. And unfortunately, bravery is not one of those things that's easily validated. Bravery is the gumption and the energy combined to actually go and do something that you've never done before. Not in a foolish way, but in a way that is accepting of potential failure. Let me say that again. Bravery is the gumption and the energy to go and do something that you've never done before, allowing for failure for the sake of experience and learning. This is such a big key to success. And it's something that really we could talk about for a hundred episodes and highly focused on this concept of bravery. If learning and focus are two pillars of what it takes to be a good developer, two pillars of what this show is about, really, then bravery is that third pillar. Being brave means identifying the reality that the only way that you're going to be able to gain experience, the only way you're going to get better as if you actually go through the process of doing something. And unfortunately, so many developers, potential developers careers are cut short because they lack in this particular trait. Bravery allows for failure. Now, that doesn't mean that you walk in again, foolishly. It means you walk in prepared. For example, it's much easier to act out of bravery if you have, you know, a emergency fund saved up. So if you decide to leave your job one day and you think that that looks like bravery and in fact, all it means is you can't pay your bills next month, then that's problematic. You're allowing for failure, but in a catastrophic way. Instead, preparing for small failures and setting yourself up for a continued and iterative process, an iterative practice of bravery, that's key. That is the trait that we're talking about. Giving yourself the space to do something that you've never done before with the hope in the expectation that you will get better at doing that thing. You're going to gain experience, and eventually you're going to generate confidence from those experiences. But first, on day one, you have to act out of bravery. Thank you so much for listening to today's episode of Developer Tea. This is the last in this series of developer career road map traits of a great developer. You know, we've done quite a few of these episodes. We're going to do a recap episode in the next episode to kind of go back and talk about how these these various traits kind of interact with each other. And hopefully you're gaining value out of these traits. And I'd hope that you will pass this stuff around to people that you think could benefit from it. These developer career road map episodes are not intended to be something that you listen to once you never return to. This is kind of the key volumes if you will of Developer Teathat we can point back to and come back and say, yes, this is this is still a trait that is required for a great developer. Thank you so much again for listening. Thank you again to fuse for sponsoring today's episode of Developer Tea. If you are a mobile application diva or maybe you aren't a mobile application developer and you want to act in a brave way, you want to actually go and build a mobile application for the first time. Fuse may be the perfect platform for you to start on rather than going to some of these antiquated tools. These things that have been around for a long time and really have remained unchanged. Instead go and check out fuse. FuseTools.com slash plans. Remember the code dt if you want to get 70% off the paid plan for 12 months. And that code has to be redeemed by December 31st of 2017. Thanks again to fuse. Thank you so much for listening. If you don't want to miss out on future episodes of Developer Tea, we have interviews. We have these kind of focused episodes. Then make sure you subscribe and whatever podcasting up you're using right now. Thank you so much for listening and until next time, enjoy your tea. \ No newline at end of file diff --git a/project_rag/data_engineering/models/.gitkeep b/project_rag/data_engineering/models/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/project_rag/data_engineering/prompts/summarize_podcast_transcript.md.j2 b/project_rag/data_engineering/prompts/summarize_podcast_transcript.md.j2 new file mode 100644 index 0000000..5e5dbf7 --- /dev/null +++ b/project_rag/data_engineering/prompts/summarize_podcast_transcript.md.j2 @@ -0,0 +1,13 @@ +You are responsible for summarizing podcast transcripts. +When the user provides a podcast transcript, summarize the key points +from the transcript into a concise summary of 100-200 words. + +*** Further Instructions *** +Ensure that you think through how to populate each field step by step. + +- Refrain from making assumptions about details the user hasn't provided. +- Make sure the quote is relevant. The quote +should be only from the interview subject, *not the interviewer*. +- In case of missing data, mark the field as "na". +- Do not print any parts of the JSON schema in the response, e.g. BAD: `{"type":"object","properties":{"summary": ...` vs. GOOD: `{"summary": ... ` +- Return the template as valid JSON. Only include the JSON in the response, no other comments. diff --git a/project_rag/data_engineering/rag_index_generator.py b/project_rag/data_engineering/rag_index_generator.py new file mode 100644 index 0000000..952211c --- /dev/null +++ b/project_rag/data_engineering/rag_index_generator.py @@ -0,0 +1,60 @@ +import glob +from pathlib import Path +from typing import List + +from llama_index.core import ( + SimpleDirectoryReader, + VectorStoreIndex, + StorageContext, + load_index_from_storage, +) +from llama_index.core.indices.base import BaseIndex +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from app.config import TRANSCRIPT_DIR + +BASE_DIR: Path = Path(__file__).resolve().parent.parent +INDEX_DIR: Path = BASE_DIR / "app" / "index_store" + +def load_embedding_model() -> HuggingFaceEmbedding: + """ + Loads and returns a HuggingFaceEmbedding model configured with a predefined model name. + + Returns: + HuggingFaceEmbedding: The loaded embedding model instance. + """ + return HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1") + +def generate_rag_index(index_dir: Path, embed_model: HuggingFaceEmbedding) -> BaseIndex: + """ + Generates or loads a VectorStoreIndex from the specified index directory using the provided embedding model. + If an index already exists in the directory, it is loaded; otherwise, a new index is created from transcripts. + + Args: + index_dir (Path): The directory where the index is stored or will be stored. + embed_model (HuggingFaceEmbedding): The embedding model to use for document encoding. + + Returns: + VectorStoreIndex: The generated or loaded vector store index. + """ + index_exists = any(item for item in index_dir.iterdir() if item.name != ".gitkeep") + if index_exists: + storage_context = StorageContext.from_defaults(persist_dir=str(index_dir)) + return load_index_from_storage(storage_context=storage_context, embed_model=embed_model) + + transcript_files = glob.glob(str(TRANSCRIPT_DIR / "*.txt")) + transcript_files = [_file for _file in transcript_files] + documents = SimpleDirectoryReader(input_files=transcript_files).load_data() + + index: VectorStoreIndex = VectorStoreIndex.from_documents(documents, embed_model=embed_model, show_progress=True) + index.storage_context.persist(persist_dir=index_dir) + return index + +def main(): + """ + Main function to load the embedding model and generate or load the RAG index. + """ + embedding_model: HuggingFaceEmbedding = load_embedding_model() + generate_rag_index(embed_model=embedding_model, index_dir=INDEX_DIR) + +if __name__ == "__main__": + main() diff --git a/project_rag/data_engineering/summarize_transcript.py b/project_rag/data_engineering/summarize_transcript.py new file mode 100644 index 0000000..661395f --- /dev/null +++ b/project_rag/data_engineering/summarize_transcript.py @@ -0,0 +1,119 @@ +import json +from pathlib import Path +from typing import Iterator +import argparse +import logging + +from llama_cpp import ( + Llama, + CreateChatCompletionResponse, + CreateChatCompletionStreamResponse, + LlamaGrammar, +) + +from app.config import MODEL_DIR, PROMPT_DIR, TRANSCRIPT_DIR, SUMMARY_DIR +from app.config import settings + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +def load_system_prompt() -> str: + with open(PROMPT_DIR / "summarize_podcast_transcript.md.j2", "r") as file: + return file.read() + +def prepare_user_prompt(transcript_path: Path) -> str: + with open(transcript_path, "r", encoding='utf-8') as file: + transcript = file.read() + return transcript + +def load_model(model_path: Path) -> Llama: + return Llama( + model_path=str(model_path), + n_ctx=settings.llm.CONTEXT_WINDOW, + n_gpu_layers=settings.llm.N_GPU_LAYERS, + chat_format=settings.llm.CHAT_FORMAT + ) + +def prepare_output( + llm: Llama, + user_prompt: str, + system_prompt: str +) -> CreateChatCompletionResponse | Iterator[CreateChatCompletionStreamResponse]: + result = llm.create_chat_completion( + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + max_tokens=settings.llm.MAX_TOKENS, + stop=[], + temperature=settings.llm.TEMPERATURE, + response_format={ + "type": "json_object", + "schema": { + "type": "object", + "properties": { + "summary": { + "type": "string", + "minLength": 200, + "maxLength": 300, + "description": "A brief summary of the interview content." + }, + "quote": { + "type": "string", + "description": "A quote from the interview subject that captures a key theme of the podcast." + }, + "interview_date": { + "type": "string", + "format": "date", + "description": "The date when the interview was conducted." + } + }, + "required": [ + "summary", + "interview_date", + ] + } + } + ) + return result['choices'][0]['message']['content'] + +def summarize_transcript(transcript_path: Path, llm: Llama) -> None: + system_prompt = load_system_prompt() + user_prompt = prepare_user_prompt(transcript_path=transcript_path) + return prepare_output( + llm=llm, + system_prompt=system_prompt, + user_prompt=user_prompt + ) + +def write_summary_to_file(summary: str, transcript_file_name: Path): + summary_file_name = f"{transcript_file_name.stem}_summary.json" + summary_path = SUMMARY_DIR / summary_file_name + with open(summary_path, "w") as file: + file.write(summary) + logger.info(f"Summary written to {summary_path}") + +def run_summary_pipeline(model: str, transcript_file_name: str) -> None: + logger.info(f"Loading model: {model}") + llm = load_model(model_path=MODEL_DIR / model) + logger.info("Loaded LLM") + + transcript_path = TRANSCRIPT_DIR / transcript_file_name + logger.info(f'Summarizing transcript: {transcript_file_name}') + summary = summarize_transcript(transcript_path=transcript_path, llm=llm) + logger.info(f'Summary prepared: {summary}') + + write_summary_to_file(summary, Path(transcript_file_name)) + logger.info(f'Summary written to file') + + +if __name__ == "__main__": + # Set up command-line argument parsing (you could also use Typer: https://github.com/tiangolo/typer) + parser = argparse.ArgumentParser(description='Run the transcript summarization pipeline.') + parser.add_argument('--model', '-m', help='local llm', default=settings.llm.MODEL_FILE_NAME) + parser.add_argument('--transcript-file', '-t', help='Transcript file name', required=True) + + args = parser.parse_args() + + run_summary_pipeline(model=args.model, transcript_file_name=args.transcript_file) diff --git a/project_rag/prestart.py b/project_rag/prestart.py new file mode 100644 index 0000000..374f5bd --- /dev/null +++ b/project_rag/prestart.py @@ -0,0 +1,15 @@ +import subprocess +import sys + +from alembic.config import Config +from alembic import command + +from app.main import ROOT + + +alembic_cfg = Config(ROOT / "alembic.ini") + +subprocess.run([sys.executable, "./app/db/backend_pre_start.py"]) +command.upgrade(alembic_cfg, "head") +subprocess.run([sys.executable, "./app/initial_data.py"]) + diff --git a/project_rag/requirements-local.txt b/project_rag/requirements-local.txt new file mode 100644 index 0000000..2c84585 --- /dev/null +++ b/project_rag/requirements-local.txt @@ -0,0 +1 @@ +llama-cpp-python>=0.2.44,<0.3.0 diff --git a/project_rag/requirements.txt b/project_rag/requirements.txt new file mode 100644 index 0000000..319caee --- /dev/null +++ b/project_rag/requirements.txt @@ -0,0 +1,14 @@ +fastapi>=0.109.2,<1.0.0 +uvicorn>=0.27.1,<1.0.0 +sqlalchemy>=2.0.0,<3.0.0 +jinja2>=3.1.3,<4.0.0 +alembic>=1.13.1,<2.0.0 +aiosqlite>=0.19.0,<1.0.0 +pydantic-settings>=2.2.0,<3.0.0 + +# New requirements +llama-index>=0.10.0,<0.11.0 +llama-index-embeddings-huggingface>=0.1.4,<0.2.0 +llama-index-llms-llama-cpp>=0.1.3,<0.2.0 +llama-index-llms-together>=0.1.3,<0.2.0 +python-dotenv \ No newline at end of file