diff --git a/.gitignore b/.gitignore index 7b6caf3..1c1a859 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ + +/dev-compose.yml \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0f61e30 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.12 +WORKDIR /app +COPY requirements.txt . +RUN pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple +COPY . . +CMD ["/bin/bash","-c","./run.sh"] \ No newline at end of file diff --git a/README.md b/README.md index 7ebd5ad..c9ba450 100755 --- a/README.md +++ b/README.md @@ -1,36 +1,43 @@ # Streaming Autogen with FastAPI This is an example FastAPI server that streams messages from the Autogen framework -## Installation +## Installation & Running +Clone the repo and build the docker image, set your LLM model and API key in the docker-compose.yml file. ```sh -git clone https://github.com/LineaLabs/autogen-fastapi.git +git clone https://github.com/0000sir/autogen-fastapi.git cd autogen-fastapi -conda create -n autogen python=3.10 -conda activate autogen -pip install -r requirements.txt +docker compose build +docker compose up ``` -## Running the server -Make sure to set `OPENAI_API_KEY` in your environment variables or in `.env` file. You can get an API key from https://platform.openai.com/account/api-keys -```sh -./run.sh -``` - - -## Querying the server +## Documentation +Navigate to http://localhost:8000/autogen to see the docs. -You can query the autogen agents using the following command: -```sh +## Call different agent groups with different model definitions +We can define different agent groups for different purposes in a single json file (app/agent_configs.json). +After defined, we can call the agent group with the there id as model parameter. +For example we have a definition as follows: +```json +{ + "article_writer": { + "name": "ArticleWriter", + # other parameters ... + } +} +``` +We can call the agent group with the following command, notice the `model` parameter: +```bash curl -X 'POST' \ 'http://localhost:8000/autogen/api/v1/chat/completions' \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer 976707f2ab39ebee343034b4b33db6f9' \ -d '{ - "model": "model_name_v0.1", + "model": "article_writer", "messages": [ { "role": "user", - "content": "Hey Mitch, can you start us off with a joke?" + "content": "写一篇向小学生介绍明朝历史的文章,涵盖明朝的建立、兴盛、衰落的过程,大约3000字。" } ], "temperature": 1, @@ -40,8 +47,5 @@ curl -X 'POST' \ "stream": true }' ``` -Note that you must provide the entire conversation history to the backend, as the server expects input in OpenAI format. - -## Documentation -Navigate to http://localhost:8000/autogen to see the docs. +For available models, please refer to the mode list API [http://localhost:8000/autogen/api/v1/models](http://localhost:8000/autogen/api/v1/models). \ No newline at end of file diff --git a/app/agent_configs.json b/app/agent_configs.json new file mode 100644 index 0000000..1fdbb45 --- /dev/null +++ b/app/agent_configs.json @@ -0,0 +1,24 @@ +{ + "article_writer": { + "name": "ArticleWriter", + "description": "默认的文章生成器,先根据需求描述生成文章提纲,然后根据提纲生成文章。", + "agents": [ + { + "type": "ConversableAgent", + "name": "writer", + "system_message": "根据editor提供的文章主题和大纲内容写作文章正文,正文应该是一段或多段文字,而不是大纲式列表,写作完成后将内容提交给editor检查,如果editor提出修改建议,则按要求修改,直到文章完成。", + "human_input_mode": "NEVER", + "code_execution_config": false, + "llm_config": {} + }, + { + "type": "ConversableAgent", + "name": "editor", + "system_message": "分析并理解user提出的文章撰写要求,构思文章大纲和标题,然后通过多次对话依次将大纲的每个部分交给writer,要求writer根据大纲撰写该章节内容。writer写作完每一部分后你都要检查是否符合主题和字数要求,不符合要求的提出修改建议,如此依次重复直到全部章节写作完成。文章完成后将写作文章的全文汇总并包含在
标记中输出,然后结束对话并输出 TERMINATE", + "human_input_mode": "NEVER", + "code_execution_config": false, + "llm_config": {} + } + ] + } +} \ No newline at end of file diff --git a/app/autogen_agents.py b/app/autogen_agents.py new file mode 100644 index 0000000..5142c00 --- /dev/null +++ b/app/autogen_agents.py @@ -0,0 +1,69 @@ +from autogen import ChatResult, GroupChat, Agent, OpenAIWrapper, ConversableAgent, UserProxyAgent, GroupChatManager +from data_model import ModelInformation +import os +# 导入json依赖库 +import json + +config_list = [{ + "model": os.getenv("LLM_MODEL", "qwen-plus"), + "base_url": os.getenv("BASE_URL","https://dashscope.aliyuncs.com/compatible-mode/v1"), + "api_key": os.getenv("API_KEY","EMPTY"), + "price" : [0.004, 0.012] +}] + +llm_config = {"config_list": config_list, "cache_seed": 42, "timeout": 180} + +# 读取 JSON 文件,初始化agent_configs +def load_agent(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + agent_configs = json.load(file) + for agent_group_name, config in agent_configs.items(): + for agent in config['agents']: + agent['llm_config'] = llm_config + return agent_configs + +agent_configs = load_agent('/app/app/agent_configs.json') + +models = {} + +for k, v in agent_configs.items(): + models[k] = ModelInformation( + id=k, + name=v["name"], + description=v["description"], + pricing={ + "prompt": "0.00", + "completion": "0.00", + "image": "0.00", + "request": "0.00", + }, + context_length=1024 * 1000, + architecture={ + "modality": "text", + "tokenizer": "text2vec-openai", + "instruct_type": "InstructGPT", + }, + top_provider={"max_completion_tokens": None, "is_moderated": False}, + per_request_limits=None + ) + +def build_agents(agent_id): + """ Must return a user_proxy agent at first place """ + agents = [] + agents.append(ConversableAgent( + name="user", + system_message="提出写作要求的用户。", + is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"), + code_execution_config= False, + human_input_mode="NEVER" + )) + for config in agent_configs[agent_id]["agents"]: + _agent = ConversableAgent( + name=config["name"], + system_message=config["system_message"], + human_input_mode=config["human_input_mode"], + code_execution_config=config["code_execution_config"], + llm_config=config["llm_config"], + ) + agents.append(_agent) + return agents \ No newline at end of file diff --git a/app/autogen_server.py b/app/autogen_server.py index edd5290..434dcde 100644 --- a/app/autogen_server.py +++ b/app/autogen_server.py @@ -20,7 +20,7 @@ def serve_autogen(inp: Input): model_dump = inp.model_dump() model_messages = model_dump["messages"] - workflow = AutogenWorkflow() + workflow = AutogenWorkflow(model_dump["model"]) if inp.stream: queue = Queue() @@ -59,7 +59,7 @@ def return_streaming_response(inp: Input, queue: Queue): usage=empty_usage, model=inp.model, ) - yield f"data: {json.dumps(chunk.model_dump())}\n\n" + yield f"data: {json.dumps(chunk.model_dump(), ensure_ascii=False)}\n\n" queue.task_done() diff --git a/app/autogen_workflow.py b/app/autogen_workflow.py index 4aeb33d..2f15e5c 100644 --- a/app/autogen_workflow.py +++ b/app/autogen_workflow.py @@ -10,6 +10,7 @@ from autogen.code_utils import content_str from autogen.io import IOStream from termcolor import colored +from autogen_agents import build_agents, llm_config def streamed_print_received_message( @@ -121,53 +122,16 @@ def streamed_print_received_message( ) -llm_config = {"model": "gpt-4", "api_key": os.environ["OPENAI_API_KEY"]} - - class AutogenWorkflow: - def __init__(self): + def __init__(self, agent_id: str): self.queue: Queue | None = None - self.user_proxy = UserProxyAgent( - name="UserProxy", - system_message="You are the UserProxy. You are the user in this conversation.", - human_input_mode="NEVER", - code_execution_config=False, - llm_config=llm_config, - description="The UserProxy is the user in this conversation. They will be interacting with the other agents in the group chat.", - ) - self.mitch_hedberg = ConversableAgent( - name="MitchHedberg", - system_message="You are the comedian Mitch Hedberg. You are known for your surreal humor and deadpan delivery. Your comedy typically featured short, sometimes one-line jokes mixed with absurd elements and non sequitur", - max_consecutive_auto_reply=3, - human_input_mode="NEVER", - code_execution_config=False, - llm_config=llm_config, - default_auto_reply="I used to do drugs. I still do, but I used to, too.", - description="Mitch Hedberg was an American stand-up comedian known for his surreal humor and deadpan " - "delivery. His comedy typically featured short, sometimes one-line jokes mixed with absurd " - "elements and non sequiturs. Hedberg's comedy and onstage persona gained him a cult " - "following, with audience members sometimes shouting out the punchlines to his jokes before " - "he could finish them.", - ) - self.greg_giraldo = ConversableAgent( - name="GregGiraldo", - system_message="You are the comedian Greg Giraldo. You are known for your acerbic style of humor and your appearances on Comedy Central's roasts. You are a former lawyer who turned to comedy full-time.", - max_consecutive_auto_reply=3, - human_input_mode="NEVER", - code_execution_config=False, - llm_config=llm_config, - default_auto_reply="I'm not a good person, but I would like to be better.", - description="Greg Giraldo was an American stand-up comedian, television personality, and lawyer. He is known for his acerbic style of humor and his appearances on Comedy Central's roasts. Giraldo was a former lawyer who turned to comedy full-time, and he was known for his sharp wit and biting commentary on a wide range of topics." - ) + + self.agents = build_agents(agent_id) self.group_chat_with_introductions = GroupChat( - agents=[ - self.user_proxy, - self.mitch_hedberg, - self.greg_giraldo, - ], + agents=self.agents, messages=[], - max_round=10, + max_round=50, send_introductions=True, ) self.group_chat_manager_with_intros = GroupChatManager( @@ -209,8 +173,8 @@ def streamed_print_received_message_with_queue_and_index( streamed_print_received_message_with_queue_and_index, self.group_chat_manager_with_intros, ) - - chat_history = self.user_proxy.initiate_chat( + # agents[0] is the user_proxy agent + chat_history = self.agents[0].initiate_chat( self.group_chat_manager_with_intros, message=message, ) if stream: diff --git a/app/main.py b/app/main.py index ca97c0a..6d11676 100644 --- a/app/main.py +++ b/app/main.py @@ -1,13 +1,32 @@ from dotenv import load_dotenv -from fastapi import FastAPI -from fastapi import HTTPException +import os +from fastapi import FastAPI, Depends, HTTPException, Request, Response from starlette.responses import RedirectResponse from autogen_server import serve_autogen from data_model import Input, ModelInformation +from autogen_agents import models load_dotenv() +# 读取环境变量中的keys +AUTH_KEYS = os.getenv("AUTH_KEYS").split(",") + +# 验证请求头中是否有正确的api_key +def authorization(req: Request): + if(not req.headers.get("Authorization")): + raise HTTPException( + status_code=401, + detail="Unauthorized" + ) + token = req.headers["Authorization"].replace("Bearer ", "") + if token not in AUTH_KEYS: + raise HTTPException( + status_code=401, + detail="Unauthorized" + ) + return True + base = "/autogen/" prefix = base + "api/v1" openapi_url = prefix + "/openapi.json" @@ -20,27 +39,6 @@ redoc_url=None, ) -model_info = ModelInformation( - id="model_id_v0.1", - name="model_name_v0.1", - description="This is a state-of-the-art model.", - pricing={ - "prompt": "0.00", - "completion": "0.00", - "image": "0.00", - "request": "0.00", - }, - context_length=1024 * 1000, - architecture={ - "modality": "text", - "tokenizer": "text2vec-openai", - "instruct_type": "InstructGPT", - }, - top_provider={"max_completion_tokens": None, "is_moderated": False}, - per_request_limits=None, -) - - @app.get(path=base, include_in_schema=False) async def docs_redirect(): return RedirectResponse(url=docs_url) @@ -49,17 +47,19 @@ async def docs_redirect(): @app.get(prefix + "/models") async def get_models(): return { - "data": {"data": model_info.dict()} + "object": "list", + "data": [model.dict(exclude={"agent_configs"}) for model in models.values()] } @app.post(prefix + "/chat/completions") -async def route_query(model_input: Input): - model_services = { - model_info.name: serve_autogen, - } +async def route_query(model_input: Input, authorized: bool = Depends(authorization)): + # model_services = { + # model_info.name: serve_autogen, + # } - service = model_services.get(model_input.model) + # service = model_services.get(model_input.model) + service = models.get(model_input.model) if not service: raise HTTPException(status_code=404, detail="Model not found") - return service(model_input) + return serve_autogen(model_input) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..0a6e1bf --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,13 @@ +services: + api: + build: . + volumes: + - ./:/app + - ~/.cache:/root/.cache + environment: + - AUTH_KEYS=YOUR_HTTP_AUTHORIZATION_KEYS_SEPARATED_BY_COMMA + - LLM_MODEL=qwen-plus + - BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 + - API_KEY=YOUR_API_KEY + ports: + - "8000:8000" \ No newline at end of file diff --git a/run.sh b/run.sh index 9256129..8e4be7a 100755 --- a/run.sh +++ b/run.sh @@ -1,5 +1,5 @@ #!/bin/bash set -a export PYTHONPATH=$PYTHONPATH:$(pwd)/app -source .env -uvicorn --reload --log-level debug app.main:app --host 0.0.0.0 + +uvicorn --reload --log-level debug app.main:app --host 0.0.0.0 --port 8000