diff --git a/assistants/document-assistant/assistant/chat.py b/assistants/document-assistant/assistant/chat.py
index 7fc4ce77..87ee95be 100644
--- a/assistants/document-assistant/assistant/chat.py
+++ b/assistants/document-assistant/assistant/chat.py
@@ -221,7 +221,9 @@ async def on_message_created(
await archive_task_queues.enqueue_run(
context=context,
attachments=attachments,
- archive_task_config=ArchiveTaskConfig(chunk_token_count_threshold=30_000),
+ archive_task_config=ArchiveTaskConfig(
+ chunk_token_count_threshold=config.orchestration.prompts.token_window
+ ),
archive_summarizer=construct_archive_summarizer(
service_config=config.generative_ai_fast_client_config.service_config,
request_config=config.generative_ai_fast_client_config.request_config,
diff --git a/assistants/document-assistant/assistant/context_management/file_manager.py b/assistants/document-assistant/assistant/context_management/file_manager.py
deleted file mode 100644
index f47819fa..00000000
--- a/assistants/document-assistant/assistant/context_management/file_manager.py
+++ /dev/null
@@ -1,511 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-# TODO: This will be replaced and currnently does not work anymore.
-
-import asyncio
-import io
-import json
-import logging
-import random
-import time
-
-import pendulum
-from assistant_drive import Drive, DriveConfig, IfDriveFileExistsBehavior
-from liquid import render
-from openai.types.chat import (
- ChatCompletionSystemMessageParam,
- ChatCompletionUserMessageParam,
-)
-from openai_client import create_client
-from semantic_workbench_assistant.assistant_app import (
- ConversationContext,
- storage_directory_for_context,
-)
-
-from assistant.config import AssistantConfigModel
-from assistant.context_management.context_manager import complete_context_management
-from assistant.context_management.conv_compaction import get_compaction_data
-from assistant.context_management.inspector import ContextManagementInspector
-from assistant.filesystem import AttachmentsExtension
-from assistant.filesystem._tasks import get_filesystem_metadata
-from assistant.response.utils.openai_utils import convert_oai_messages_to_xml, get_completion
-from assistant.response.utils.tokens_tiktoken import TokenizerOpenAI
-from assistant.types import FileManagerData, FileRelevance
-
-logger = logging.getLogger(__name__)
-
-USE_FAST_SCORING = True
-
-FILE_SCORE_SYSTEM_PROMPT = """You are determining the probability of a file being relevant to the conversation and topics being discussed.
-You will be provided with a conversation history and the content of the file to analyze. \
-Note that sometimes the file content will actually be a chunk of a conversation. \
-Additionally, the conversation history may contain other files, these are NOT the files you are scoring. \
-Do not let these aspects confuse you. Use the XML tags to determine what is what.
-From the conversation history, you should extrapolate what the the conversation is about to determine if a file is **core** \
-to understanding the context of what is happening or if it is specific to one topic or question at a point in time.
-Current date: {{current_datetime}}
-
-1. Conduct brief reasoning
-Ask yourself questions such as the following to determine the probability of the file being overall relevant to the next task the user might want:
-- What is the likelihood that the file will be needed to complete the next task that the user will ask?
-- Is the file something that has been added or used recently? For example, documents that the user has recently edited or added should be scored very highly.
-- Is it a file that provides important global context? For example, guidelines, a framing document, goals, checklists, etc. If so that file should be scored the highest for relevance.
- - While these files might not be explictly used or referenced, but they contribute to the overall understanding and thus should be score higher for recency as well.
-- Does the file have meaningful content or does it seem like more random notes or a draft? If so, that file is probably not as relevant.
-
-2. Determine Recency Probability
-- Based on the conversation, the file content, and your reasoning provide a score that indicates how recently the file has been leveraged.
-- You should subtract 0.1 for every turn (where a turn starts with a user message and ends with the final assistant message) since the file was last used.
-
-3. Determine Relevance Probability
-- Similarly, based on the conversation, the file content, and your reasoning provide a score that indicates how \
-relevant the file is to the next task the user might want to do and/or their overall goals
-- Don't let the recency probability influence this score, rather focus on the content of the file and the conversation history."""
-
-FILE_SCORE_USER_PROMPT = """{{conversation_history}}
-
-
-{{file_content}}
-
-
-Now briefly reason and determine the recency and relevance probabilities."""
-
-
-FILE_SCORE_SCHEMA = {
- "name": "score_file",
- "schema": {
- "type": "object",
- "properties": {
- "relevance_probability": {
- "type": "object",
- "properties": {
- "brief_reasoning": {
- "type": "string",
- "description": "Your reasoning about the probability of the file's (near) future relevance to the user's next task and goals. Keep it to 100 words or less",
- },
- "recency_probability": {
- "type": "number",
- "description": "Probability (from 0 to 1) that the file is relevant to the user's next task based on how recently it has been used.",
- },
- "relevance_probability": {
- "type": "number",
- "description": "Probability (from 0 to 1) that the file is relevant to the user's next task based on its content and the conversation context.",
- },
- },
- "required": [
- "brief_reasoning",
- "recency_probability",
- "relevance_probability",
- ],
- "additionalProperties": False,
- "description": "Reasoning and probabilities for the file's relevance to the user's next task.",
- }
- },
- "required": ["relevance_probability"],
- "additionalProperties": False,
- },
- "strict": True,
-}
-
-MULTI_FILE_SCORE_SYSTEM_PROMPT = """You are determining the probability of files being relevant to the conversation and topics being discussed.
-You will be provided with a conversation history and the summarized contents of files for analyze. \
-Note that sometimes the file content will actually be a chunk of a conversation. \
-Additionally, the conversation history may contain other files, these are NOT the files you are scoring. \
-Do not let these aspects confuse you. Use the XML tags to determine what is what.
-From the conversation history, you should extrapolate what the the conversation is about to determine if a file is **core** \
-to understanding the context of what is happening or if it is specific to one topic or question at a point in time.
-Current date: {{current_datetime}}
-
-FOR EACH FILE:
-1. Write down the file name/path
-
-2. Conduct brief reasoning
-Ask yourself questions such as the following to determine the probability of the file being overall relevant to the next task the user might want:
-- What is the likelihood that the file will be needed to complete the next task that the user will ask?
-- Is the file something that has been added or used recently? For example, documents that the user has recently edited or added should be scored very highly.
-- Is it a file that provides important global context? For example, guidelines, a framing document, goals, checklists, etc. If so that file should be scored the highest for relevance.
- - While these files might not be explictly used or referenced, but they contribute to the overall understanding and thus should be score higher for recency as well.
-- Does the file have meaningful content or does it seem like more random notes or a draft? If so, that file is probably not as relevant.
-
-3. Determine Recency Probability
-- Based on the conversation, the file content, and your reasoning provide a score that indicates how recently the file has been leveraged.
-- You should subtract 0.1 for every turn (where a turn starts with a user message and ends with the final assistant message) since the file was last used.
-
-4. Determine Relevance Probability
-- Similarly, based on the conversation, the file content, and your reasoning provide a score that indicates how \
-relevant the file is to the next task the user might want to do and/or their overall goals
-- Don't let the recency probability influence this score, rather focus on the content of the file and the conversation history."""
-
-MULTI_FILE_SCORE_USER_PROMPT = """{{conversation_history}}
-
-
-{{files_content}}
-
-
-Now briefly reason and determine the recency and relevance probabilities for each file."""
-
-MULTI_FILE_SCORE_SCHEMA = {
- "name": "score_files",
- "schema": {
- "type": "object",
- "properties": {
- "file_scores": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "file_name": {
- "type": "string",
- "description": "The name/path of the file being scored",
- },
- "brief_reasoning": {
- "type": "string",
- "description": "Your reasoning about the probability of the file's (near) future relevance to the user's next task and goals. Keep it to 100 words or less",
- },
- "recency_probability": {
- "type": "number",
- "description": "Probability (from 0 to 1) that the file is relevant to the user's next task based on how recently it has been used.",
- },
- "relevance_probability": {
- "type": "number",
- "description": "Probability (from 0 to 1) that the file is relevant to the user's next task based on its content and the conversation context.",
- },
- },
- "required": [
- "file_name",
- "brief_reasoning",
- "recency_probability",
- "relevance_probability",
- ],
- "additionalProperties": False,
- },
- }
- },
- "required": ["file_scores"],
- "additionalProperties": False,
- },
- "strict": True,
-}
-
-
-_file_manager_locks: dict[str, asyncio.Lock] = {}
-tokenizer = TokenizerOpenAI(model="gpt-4o")
-
-
-def _get_file_manager_lock_for_context(context: ConversationContext) -> asyncio.Lock:
- """Get or create a conversation-specific lock for file manager operations."""
- if context.id not in _file_manager_locks:
- _file_manager_locks[context.id] = asyncio.Lock()
- return _file_manager_locks[context.id]
-
-
-def _file_manager_drive_for_context(context: ConversationContext) -> Drive:
- drive_root = storage_directory_for_context(context) / "file_manager"
- return Drive(DriveConfig(root=drive_root))
-
-
-async def get_file_rankings(context: ConversationContext) -> FileManagerData:
- drive = _file_manager_drive_for_context(context)
-
- if not drive.file_exists("file_rankings.json"):
- return FileManagerData()
-
- try:
- with drive.open_file("file_rankings.json") as f:
- data = json.load(f)
- file_manager_data = FileManagerData.model_validate(data)
- return file_manager_data
- except Exception:
- return FileManagerData()
-
-
-async def save_file_rankings(context: ConversationContext, file_manager_data: FileManagerData) -> None:
- drive = _file_manager_drive_for_context(context)
- data_json = json.dumps(file_manager_data.model_dump(), indent=2).encode("utf-8")
- drive.write(
- content=io.BytesIO(data_json),
- filename="file_rankings.json",
- if_exists=IfDriveFileExistsBehavior.OVERWRITE,
- content_type="application/json",
- )
-
-
-async def _slow_score_files(
- context: ConversationContext,
- config: AssistantConfigModel,
- attachments_extension: AttachmentsExtension,
- context_management_inspector: ContextManagementInspector,
-):
- """
- Score files based on an LLM prompt that looks at the conversation history and the file
- """
- logger.debug(f"Acquiring file manager lock for conversation {context.id}")
- async with asyncio.timeout(500):
- async with _get_file_manager_lock_for_context(context):
- logger.debug(f"File manager lock acquired for conversation {context.id}")
- # Get existing file rankings to avoid recomputing
- existing_file_manager_data = await get_file_rankings(context)
-
- # Get all the files: attachments, files, and conversation chunks
- attachment_filenames = await attachments_extension.get_attachment_filenames(context)
- doc_editor_filenames = await attachments_extension._inspectors.list_document_filenames(context)
- compaction_data = await get_compaction_data(context)
-
- # For each file, construct the prompt
- all_files = set(attachment_filenames + doc_editor_filenames)
- for chunk in compaction_data.compaction_data.values():
- all_files.add(chunk.chunk_name)
-
- # If there are less than max_relevant files, we can skip scoring
- max_relevant_files = config.orchestration.prompts.max_relevant_files
- if len(all_files) <= max_relevant_files:
- return
-
- percent_files_score_per_turn = config.orchestration.prompts.percent_files_score_per_turn
- minimum_files = 0
- max_files = 100
-
- # Separate files into those with and without scores
- files_without_scores = [path for path in all_files if path not in existing_file_manager_data.file_data]
- files_with_scores = [path for path in all_files if path in existing_file_manager_data.file_data]
-
- # Calculate target number of files to score based on percentage
- total_files_to_score = max(
- minimum_files, min(max_files, int(len(all_files) * percent_files_score_per_turn))
- )
-
- # Start with ALL unscored files up to max_files
- files_to_score = files_without_scores[:max_files]
-
- # If we haven't reached our target and have remaining budget, sample from scored files
- remaining_slots = total_files_to_score - len(files_to_score)
- if remaining_slots > 0 and files_with_scores:
- additional_files = random.sample(files_with_scores, min(remaining_slots, len(files_with_scores)))
- files_to_score.extend(additional_files)
-
- files_to_score = files_to_score[:max_files]
- if not files_to_score:
- return
-
- # Get the conversation history only if we have files to score
- conversation_history = await complete_context_management(
- context, config, attachments_extension, context_management_inspector, []
- )
- conv_history = await convert_oai_messages_to_xml(
- conversation_history,
- filename=None,
- )
-
- for path in files_to_score:
- # First try to find the path as an editable file
- file_content = await attachments_extension._inspectors.get_file_content(context, path)
- # Then try to find the path as an attachment file
- if file_content is None:
- file_content = await attachments_extension.get_attachment(context, path)
- # Finally try to find the path as a conversation file.
- if file_content is None:
- compaction_data = await get_compaction_data(context)
- for chunk in compaction_data.compaction_data.values():
- if chunk.chunk_name == path:
- file_content = chunk.original_conversation_text
- break
- if file_content is None:
- continue
- file_content = tokenizer.truncate_str(file_content, max_len=config.orchestration.prompts.token_window)
-
- system_prompt = render(FILE_SCORE_SYSTEM_PROMPT, current_datetime=pendulum.now().format("YYYY-MM-DD"))
- user_prompt = render(
- FILE_SCORE_USER_PROMPT,
- conversation_history=conv_history,
- file_path=path,
- file_content=file_content,
- )
- file_score_messages = [
- ChatCompletionSystemMessageParam(role="system", content=system_prompt),
- ChatCompletionUserMessageParam(role="user", content=user_prompt),
- ]
- async with create_client(config.generative_ai_fast_client_config.service_config) as client:
- response = await get_completion(
- client=client,
- request_config=config.generative_ai_fast_client_config.request_config,
- chat_message_params=file_score_messages,
- tools=None,
- structured_output=FILE_SCORE_SCHEMA,
- )
- try:
- content = response.choices[0].message.content or "{}"
- json_message = json.loads(content)
- except json.JSONDecodeError:
- json_message = {}
-
- reasoning = ""
- recency_probability = 0.0
- relevance_probability = 0.0
- if "relevance_probability" in json_message:
- relevance_data = json_message.get("relevance_probability", {})
- reasoning = relevance_data.get("brief_reasoning", "")
- recency_probability = relevance_data.get("recency_probability", 0.0)
- relevance_probability = relevance_data.get("relevance_probability", 0.0)
- file_relevance = FileRelevance(
- brief_reasoning=reasoning,
- recency_probability=recency_probability,
- relevance_probability=relevance_probability,
- )
-
- existing_file_manager_data.file_data[path] = file_relevance
- await save_file_rankings(context, existing_file_manager_data)
- logger.debug(f"File manager lock released for conversation {context.id}")
-
-
-async def _fast_score_files(
- context: ConversationContext,
- config: AssistantConfigModel,
- attachments_extension: AttachmentsExtension,
- context_management_inspector: ContextManagementInspector,
-) -> None:
- """
- Score multiple files at once using summaries instead of full content for better performance
- """
- logger.debug(f"Acquiring file manager lock for conversation {context.id}")
- async with asyncio.timeout(500):
- async with _get_file_manager_lock_for_context(context):
- logger.debug(f"File manager lock acquired for conversation {context.id}")
- # Get existing file rankings to avoid recomputing
- existing_file_manager_data = await get_file_rankings(context)
-
- # Get all the files: attachments, files, and conversation chunks
- attachment_filenames = await attachments_extension.get_attachment_filenames(context)
- doc_editor_filenames = await attachments_extension._inspectors.list_document_filenames(context)
- compaction_data = await get_compaction_data(context)
-
- # For each file, construct the prompt
- all_files = set(attachment_filenames + doc_editor_filenames)
- for chunk in compaction_data.compaction_data.values():
- all_files.add(chunk.chunk_name)
-
- # If there are less than max_relevant files, we can skip scoring
- max_relevant_files = config.orchestration.prompts.max_relevant_files
- if len(all_files) <= max_relevant_files:
- return
-
- percent_files_score_per_turn = config.orchestration.prompts.percent_files_score_per_turn
- minimum_files = 0
- max_files = 40
-
- # Separate files into those with and without scores
- files_without_scores = [path for path in all_files if path not in existing_file_manager_data.file_data]
- files_with_scores = [path for path in all_files if path in existing_file_manager_data.file_data]
-
- # If there are unscored files, score up to max_files of them
- if files_without_scores:
- files_to_score = files_without_scores[:max_files]
- else:
- # If all files are scored, use percentage-based scoring for re-scoring
- total_files_to_score = max(
- minimum_files, min(max_files, int(len(all_files) * percent_files_score_per_turn))
- )
- files_to_score = random.sample(files_with_scores, min(total_files_to_score, len(files_with_scores)))
- files_to_score = files_to_score[:max_files]
- if not files_to_score:
- return
-
- # Get filesystem metadata for summaries
- filesystem_metadata = await get_filesystem_metadata(context)
-
- # Build files content string with summaries
- files_content_parts = []
- for path in files_to_score:
- summary = "No summary available yet."
- # Get file summary from filesystem metadata
- file_metadata = filesystem_metadata.get(path)
- if file_metadata and file_metadata.summary:
- summary = file_metadata.summary
- else:
- # Fallback: use compacted summary for conversation chunks
- for chunk in compaction_data.compaction_data.values():
- if chunk.chunk_name == path:
- summary = chunk.compacted_text
- break
-
- files_content_parts.append(f'\n{summary}\n')
-
- files_content = "\n\n".join(files_content_parts)
-
- # Get the conversation history only if we have files to score
- conversation_history = await complete_context_management(
- context, config, attachments_extension, context_management_inspector, []
- )
- conv_history = await convert_oai_messages_to_xml(
- conversation_history,
- filename=None,
- )
-
- # Limit the size of the files_content to ensure we stay within token limits
- files_content = tokenizer.truncate_str(
- files_content,
- max_len=config.generative_ai_fast_client_config.request_config.max_tokens
- - config.orchestration.prompts.max_total_tokens
- - 2000,
- )
-
- # Create the prompts
- system_prompt = render(MULTI_FILE_SCORE_SYSTEM_PROMPT, current_datetime=pendulum.now().format("YYYY-MM-DD"))
- user_prompt = render(
- MULTI_FILE_SCORE_USER_PROMPT,
- conversation_history=conv_history,
- files_content=files_content,
- )
-
- multi_file_score_messages = [
- ChatCompletionSystemMessageParam(role="system", content=system_prompt),
- ChatCompletionUserMessageParam(role="user", content=user_prompt),
- ]
-
- # Score all files at once
- async with create_client(config.generative_ai_fast_client_config.service_config) as client:
- response = await get_completion(
- client=client,
- request_config=config.generative_ai_fast_client_config.request_config,
- chat_message_params=multi_file_score_messages,
- tools=None,
- structured_output=MULTI_FILE_SCORE_SCHEMA,
- )
- try:
- content = response.choices[0].message.content or "{}"
- json_message = json.loads(content)
- except json.JSONDecodeError:
- json_message = {}
-
- file_scores = json_message.get("file_scores", [])
- for file_score in file_scores:
- file_name = file_score.get("file_name", "")
- if file_name in files_to_score:
- reasoning = file_score.get("brief_reasoning", "")
- recency_probability = file_score.get("recency_probability", 0.0)
- relevance_probability = file_score.get("relevance_probability", 0.0)
-
- file_relevance = FileRelevance(
- brief_reasoning=reasoning,
- recency_probability=recency_probability,
- relevance_probability=relevance_probability,
- )
-
- existing_file_manager_data.file_data[file_name] = file_relevance
-
- await save_file_rankings(context, existing_file_manager_data)
-
- # TODO: REMOVE ME
- time.sleep(5)
- logger.debug(f"File manager lock released for conversation {context.id}")
-
-
-async def task_score_files(
- context: ConversationContext,
- config: AssistantConfigModel,
- attachments_extension: AttachmentsExtension,
- context_management_inspector: ContextManagementInspector,
-) -> None:
- if USE_FAST_SCORING:
- await _fast_score_files(context, config, attachments_extension, context_management_inspector)
- else:
- await _slow_score_files(context, config, attachments_extension, context_management_inspector)
diff --git a/assistants/document-assistant/assistant/filesystem/_prompts.py b/assistants/document-assistant/assistant/filesystem/_prompts.py
index a8498d97..2dc4ab13 100644
--- a/assistants/document-assistant/assistant/filesystem/_prompts.py
+++ b/assistants/document-assistant/assistant/filesystem/_prompts.py
@@ -20,19 +20,25 @@
Files that are read-only are known as "attachments" and are initially appended to user's message at the time they uploaded them. \
Eventually they might fall out of your context window and you will need to use the `view` tool to read them again if you need it. \
A summary of the file content has been provided to you to better understand what the file is about.
+There are more files that you can access. First call the `ls` tool to list all files available in the filesystem.
### Recent & Relevant Files
You can read the following files in again using the `view` tool if they are needed. \
-If they are editable you can also use the `edit_file` tool to edit them."""
+If they are editable you can also use the `edit_file` tool to edit them.
+Paths are mounted at different locations depending on the type of file and you must always use the absolute path to the file, starting with `/` for any path.
+- Editable files are mounted at `/editable_documents/editable_file.md`.
+- User uploaded files or "attachments" are mounted at `/attachments/attachment.pdf`."""
-FILESYSTEM_ADDON_PROMPT = """### Filesystem
-**Very important:** This current interaction with the user is long-running and due to context window limitations, the above section can only show a limited number of files. \
-There are more files that you can access. First call the `ls` tool to list all files available in the filesystem. \
-Then, you can use the `view` tool (use it multiple times if needed) to read any of the files that you find relevant to the user's request.\
-This is a similar concept to how you would explore a codebase in a code editor."""
+ARCHIVES_ADDON_PROMPT = """### Conversation Memories and Archives
+You have a limited context window, which means that some of the earlier parts of the conversation may fall out of your context.
+To help you with that, below you will find summaries of older parts of the conversation that have been "archived". \
+You should use these summaries as "memories" to help you understand the historical context and preferences of the user. \
+Note that some of these archived conversation may still be visible to you in the conversation history.
+If the current user's task requires you to access the full content of the conversation, you can use the `view` tool to read the archived conversations. \
+Historical conversations are mounted at `/archives/conversation_1234567890.json`"""
VIEW_TOOL = {
"type": "function",
@@ -45,7 +51,7 @@
"properties": {
"path": {
"type": "string",
- "description": "The relative path to the file.",
+ "description": "The absolute path to the file. Must start with `/` followed by the mount point, e.g. `/editable_documents/filename.md`.",
},
},
"required": ["path"],
@@ -90,8 +96,9 @@
EDIT_TOOL_DESCRIPTION_HOSTED = """Edits the Markdown file at the provided path, focused on the given task.
The user has Markdown editor available that is side by side with this chat.
Remember that the editable files are the ones that have the `-rw-` permission bits. \
+They also must be mounted at `/editable_documents/` and have a `.md` extension. \
If you provide a new file path, it will be created for you and then the editor will start to edit it (from scratch). \
-Name the file with capital letters and spacing like "Weekly AI Report.md" or "Email to Boss.md" since it will be directly shown to the user in that way.
+Name the file with capital letters and spacing like "/editable_documents/Weekly AI Report.md" or "/editable_documents/Email to Boss.md" since it will be directly shown to the user in that way.
Provide a task that you want it to do in the document. For example, if you want to have it expand on one section, \
you can say "expand on the section about ". The task should be at most a few sentences. \
Do not provide it any additional context outside of the task parameter. It will automatically be fetched as needed by this tool.
diff --git a/assistants/document-assistant/assistant/response/completion_handler.py b/assistants/document-assistant/assistant/response/completion_handler.py
index 45da0833..de9f33ed 100644
--- a/assistants/document-assistant/assistant/response/completion_handler.py
+++ b/assistants/document-assistant/assistant/response/completion_handler.py
@@ -10,7 +10,6 @@
from assistant_extensions.mcp import (
ExtendedCallToolRequestParams,
MCPSession,
- OpenAISamplingHandler,
handle_mcp_tool_call,
)
from chat_context_toolkit.virtual_filesystem import VirtualFileSystem
@@ -29,7 +28,6 @@
ConversationContext,
)
-from assistant.filesystem import AttachmentsExtension
from assistant.guidance.dynamic_ui_inspector import update_dynamic_ui_state
from assistant.guidance.guidance_prompts import DYNAMIC_UI_TOOL_NAME, DYNAMIC_UI_TOOL_RESULT
diff --git a/assistants/document-assistant/assistant/response/responder.py b/assistants/document-assistant/assistant/response/responder.py
index b96d3021..3e87ed42 100644
--- a/assistants/document-assistant/assistant/response/responder.py
+++ b/assistants/document-assistant/assistant/response/responder.py
@@ -69,7 +69,7 @@
)
from assistant.filesystem._file_sources import attachments_file_source_mount, editable_documents_file_source_mount
from assistant.filesystem._filesystem import _files_drive_for_context
-from assistant.filesystem._prompts import FILES_PROMPT, LS_TOOL_OBJ
+from assistant.filesystem._prompts import ARCHIVES_ADDON_PROMPT, FILES_PROMPT, LS_TOOL_OBJ
from assistant.guidance.dynamic_ui_inspector import get_dynamic_ui_state, update_dynamic_ui_state
from assistant.guidance.guidance_prompts import DYNAMIC_UI_TOOL_NAME, DYNAMIC_UI_TOOL_OBJ
from assistant.response.completion_handler import handle_completion
@@ -452,32 +452,43 @@ async def _construct_dynamic_ui_system_prompt(self) -> str:
return system_prompt
async def _construct_filesystem_system_prompt(self) -> str:
- """Constructs the files available to the assistant that are out of context.
+ """Constructs the filesystem system prompt with available files.
+
+ Builds a system prompt that includes:
+ 1. FILES_PROMPT with attachments and editable_documents (up to 25 files)
+ 2. ARCHIVES_ADDON_PROMPT (if archives exist)
+ 3. Archives files listing (up to 25 files)
+
+ Files are sorted by timestamp (newest first), limited to 25 per category,
+ then sorted alphabetically by path.
This is an example of what gets added after the FILES_PROMPT:
-r-- path2.pdf [File content summary: ]
-rw- path3.txt [File content summary: No summary available yet, use the context available to determine the use of this file]
"""
- entries = (
- list(await self.virtual_filesystem.list_directory(path="/attachments"))
- + list(await self.virtual_filesystem.list_directory(path="/editable_documents"))
- + list(await self.virtual_filesystem.list_directory(path="/archives"))
- )
- files = [entry for entry in entries if isinstance(entry, FileEntry)]
+ # Get all file entries
+ attachments_entries = list(await self.virtual_filesystem.list_directory(path="/attachments"))
+ editable_documents_entries = list(await self.virtual_filesystem.list_directory(path="/editable_documents"))
+ archives_entries = list(await self.virtual_filesystem.list_directory(path="/archives"))
+
+ # Separate regular files from archives
+ regular_files = [entry for entry in (attachments_entries + editable_documents_entries) if isinstance(entry, FileEntry)]
+ archives_files = [entry for entry in archives_entries if isinstance(entry, FileEntry)]
# TODO: Better ranking algorithm
- # order the files by timestamp, newest first
- files.sort(key=lambda f: f.timestamp, reverse=True)
- # take the top 25 files
- files = files[:25]
+ # order the regular files by timestamp, newest first
+ regular_files.sort(key=lambda f: f.timestamp, reverse=True)
+ # take the top 25 regular files
+ regular_files = regular_files[:25]
# order them alphabetically by path
- files.sort(key=lambda f: f.path.lower())
+ regular_files.sort(key=lambda f: f.path.lower())
+ # Start with FILES_PROMPT and add attachments/editable_documents
system_prompt = FILES_PROMPT + "\n"
- if not files:
+ if not regular_files:
system_prompt += "\nNo files are currently available."
- for file in files:
+ for file in regular_files:
# Format permissions: -rw- for read_write, -r-- for read
permissions = "-rw-" if file.permission == "read_write" else "-r--"
# Use the file description as the summary, or provide a default message
@@ -487,6 +498,29 @@ async def _construct_filesystem_system_prompt(self) -> str:
else "No summary available yet, use the context available to determine the use of this file"
)
system_prompt += f"{permissions} {file.path} [File content summary: {summary}]\n"
+
+ # Add ARCHIVES_ADDON_PROMPT if there are archives
+ if archives_files:
+ system_prompt += "\n" + ARCHIVES_ADDON_PROMPT + "\n"
+
+ # order the archives files by timestamp, newest first
+ archives_files.sort(key=lambda f: f.timestamp, reverse=True)
+ # take the top 25 archives files
+ archives_files = archives_files[:25]
+ # order them alphabetically by path
+ archives_files.sort(key=lambda f: f.path.lower())
+
+ for file in archives_files:
+ # Format permissions: -rw- for read_write, -r-- for read
+ permissions = "-rw-" if file.permission == "read_write" else "-r--"
+ # Use the file description as the summary, or provide a default message
+ summary = (
+ file.description
+ if file.description
+ else "No summary available yet, use the context available to determine the use of this file"
+ )
+ system_prompt += f"{permissions} {file.path} [File content summary: {summary}]\n"
+
return system_prompt
def _override_edit_file_description(self, tools: list[ChatCompletionToolParam]) -> list[ChatCompletionToolParam]:
diff --git a/libraries/python/chat-context-toolkit/chat_context_toolkit/virtual_filesystem/_virtual_filesystem.py b/libraries/python/chat-context-toolkit/chat_context_toolkit/virtual_filesystem/_virtual_filesystem.py
index a302aff5..5fd5e703 100644
--- a/libraries/python/chat-context-toolkit/chat_context_toolkit/virtual_filesystem/_virtual_filesystem.py
+++ b/libraries/python/chat-context-toolkit/chat_context_toolkit/virtual_filesystem/_virtual_filesystem.py
@@ -47,7 +47,13 @@ def mounts(self) -> Iterable[MountPoint]:
return self._mounts.values()
def _split_path(self, path: str) -> tuple[str, str]:
- _, mount_path_segment, *source_path_segments = path.split("/")
+ path_segments = path.split("/")
+ if len(path_segments) < 2:
+ raise ValueError(
+ f"Invalid path format: {path}. Path must start with '/' and contain at least one directory."
+ )
+
+ _, mount_path_segment, *source_path_segments = path_segments
mount_path = "/" + mount_path_segment
source_path = "/" + "/".join(source_path_segments)
return mount_path, source_path
diff --git a/libraries/python/chat-context-toolkit/chat_context_toolkit/virtual_filesystem/tools/_view_tool.py b/libraries/python/chat-context-toolkit/chat_context_toolkit/virtual_filesystem/tools/_view_tool.py
index 9be60ba1..77383c3c 100644
--- a/libraries/python/chat-context-toolkit/chat_context_toolkit/virtual_filesystem/tools/_view_tool.py
+++ b/libraries/python/chat-context-toolkit/chat_context_toolkit/virtual_filesystem/tools/_view_tool.py
@@ -54,6 +54,8 @@ async def execute(self, args: dict) -> str | Iterable[ChatCompletionContentPartT
file_content = await self.virtual_filesystem.read_file(path)
except FileNotFoundError:
return f"Error: File at path {path} not found. Please pay attention to the available files and try again."
+ except ValueError as e:
+ return f"Error: {str(e)}"
result = f'\n{file_content}\n'
return result
diff --git a/libraries/python/chat-context-toolkit/test/virtual_filesystem/tools/test_view_tool.py b/libraries/python/chat-context-toolkit/test/virtual_filesystem/tools/test_view_tool.py
index 7a6a666b..add70c9d 100644
--- a/libraries/python/chat-context-toolkit/test/virtual_filesystem/tools/test_view_tool.py
+++ b/libraries/python/chat-context-toolkit/test/virtual_filesystem/tools/test_view_tool.py
@@ -29,3 +29,18 @@ async def test_view_tool_error_handling():
result
== "Error: File at path /nonexistent.txt not found. Please pay attention to the available files and try again."
)
+
+
+async def test_view_tool_invalid_path_format():
+ """Test ViewTool.execute with invalid path formats."""
+ mock_vfs = MagicMock(spec=VirtualFileSystem)
+ mock_vfs.read_file.side_effect = ValueError(
+ "Invalid path format: Bair Haiku.md. Path must start with '/' and contain at least one directory."
+ )
+
+ view_tool = ViewTool(mock_vfs)
+ result = await view_tool.execute({"path": "Bair Haiku.md"})
+ assert (
+ result
+ == "Error: Invalid path format: Bair Haiku.md. Path must start with '/' and contain at least one directory."
+ )