Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions agent-brain-cli/agent_brain_cli/commands/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"csharp": ["*.cs"],
"c": ["*.c", "*.h"],
"cpp": ["*.cpp", "*.hpp", "*.cc", "*.hh"],
"pascal": ["*.pas", "*.pp", "*.lpr", "*.dpr"],
"web": ["*.html", "*.css", "*.scss", "*.jsx", "*.tsx"],
"docs": ["*.md", "*.txt", "*.rst", "*.pdf"],
"text": ["*.md", "*.txt", "*.rst"],
Expand All @@ -50,6 +51,10 @@
"*.hpp",
"*.cc",
"*.hh",
"*.pas",
"*.pp",
"*.lpr",
"*.dpr",
],
}

Expand Down
8 changes: 8 additions & 0 deletions agent-brain-cli/tests/test_types_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def test_all_expected_presets_exist(self) -> None:
"csharp",
"c",
"cpp",
"pascal",
"web",
"docs",
"text",
Expand All @@ -136,6 +137,13 @@ def test_docs_preset_patterns(self) -> None:
assert "*.md" in FILE_TYPE_PRESETS["docs"]
assert "*.pdf" in FILE_TYPE_PRESETS["docs"]

def test_pascal_preset_patterns(self) -> None:
"""Test Pascal preset includes all four extensions."""
assert "*.pas" in FILE_TYPE_PRESETS["pascal"]
assert "*.pp" in FILE_TYPE_PRESETS["pascal"]
assert "*.lpr" in FILE_TYPE_PRESETS["pascal"]
assert "*.dpr" in FILE_TYPE_PRESETS["pascal"]

def test_code_preset_is_superset(self) -> None:
"""Test that 'code' preset contains all language patterns."""
code_patterns = set(FILE_TYPE_PRESETS["code"])
Expand Down
85 changes: 84 additions & 1 deletion agent-brain-server/agent_brain_server/indexing/chunking.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,7 @@ def _setup_language(self) -> None:
"cpp": "cpp",
"c": "c",
"csharp": "csharp",
"pascal": "pascal",
}

lang_id = lang_map.get(self.language)
Expand Down Expand Up @@ -486,7 +487,7 @@ def _get_symbols(self, text: str) -> list[dict[str, Any]]:
logger.error(f"Failed to parse AST: {e}")
return []

symbols = []
symbols: list[dict[str, Any]] = []

# Define queries for common languages
query_str = ""
Expand Down Expand Up @@ -557,6 +558,15 @@ def _get_symbols(self, text: str) -> list[dict[str, Any]]:
(namespace_declaration
name: (identifier) @name) @symbol
"""
elif self.language == "pascal":
# Pascal: use manual AST walking because qualified method names
# (TClass.Method via genericDot) can't be unambiguously captured
# with a single tree-sitter query capture.
try:
self._collect_pascal_symbols(root, symbols)
except Exception as e:
logger.error(f"Error querying AST for {self.language}: {e}")
return symbols

if not query_str:
return []
Expand Down Expand Up @@ -640,6 +650,79 @@ def _extract_xml_doc_comment(self, text: str, declaration_line: int) -> str | No

return plain_text if plain_text else None

def _collect_pascal_symbols(
self,
node: tree_sitter.Node,
symbols: list[dict[str, Any]],
) -> None:
"""Recursively walk a Pascal AST and collect procedure/function/type symbols.

Args:
node: Current tree-sitter AST node.
symbols: Accumulator list to append symbol dicts into.
"""
if node.type == "defProc":
# Procedure or function implementation body.
for child in node.children:
if child.type == "declProc":
name = self._pascal_proc_name(child)
if name:
symbols.append(
{
"name": name,
"kind": "defProc",
"start_line": node.start_point[0] + 1,
"end_line": node.end_point[0] + 1,
}
)
break
elif node.type == "declType":
# Type declaration (class, record, enum, etc.).
for child in node.children:
if child.type == "identifier":
raw = child.text
if raw:
symbols.append(
{
"name": raw.decode("utf-8"),
"kind": "declType",
"start_line": node.start_point[0] + 1,
"end_line": node.end_point[0] + 1,
}
)
break

for child in node.children:
self._collect_pascal_symbols(child, symbols)

def _pascal_proc_name(self, decl_proc: tree_sitter.Node) -> str | None:
"""Extract the bare name from a Pascal ``declProc`` AST node.

Handles two forms:
- Simple: ``procedure Foo;`` → ``identifier`` is a direct child.
- Qualified: ``procedure TClass.Method;`` → name lives inside
``genericDot``; the last ``identifier`` child is the method name.

Args:
decl_proc: The ``declProc`` node to inspect.

Returns:
The extracted name string, or ``None`` if no identifier was found.
"""
for child in decl_proc.children:
if child.type == "identifier":
raw = child.text
return raw.decode("utf-8") if raw else None
if child.type == "genericDot":
# Walk children of genericDot and keep the last identifier.
last_ident: tree_sitter.Node | None = None
for sub in child.children:
if sub.type == "identifier":
last_ident = sub
if last_ident is not None and last_ident.text:
return last_ident.text.decode("utf-8")
return None

def count_tokens(self, text: str) -> int:
"""Count the number of tokens in a text string."""
return len(self.tokenizer.encode(text))
Expand Down
26 changes: 24 additions & 2 deletions agent-brain-server/agent_brain_server/indexing/document_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ class LanguageDetector:
"""
Utility for detecting programming languages from file paths and content.

Supports the 10 languages with tree-sitter parsers:
- Python, TypeScript, JavaScript, Kotlin, C, C++, Java, Go, Rust, Swift
Supports languages with tree-sitter parsers (AST-aware chunking):
- Python, TypeScript, JavaScript, Go, Rust, Java, C, C++, C#, Pascal

Also detects Kotlin and Swift by extension/content (no AST chunking).
"""

# Language detection by file extension
Expand Down Expand Up @@ -76,6 +78,11 @@ class LanguageDetector:
# C#
".cs": "csharp",
".csx": "csharp",
# Object Pascal / Delphi / Free Pascal / Lazarus
".pas": "pascal",
".pp": "pascal",
".lpr": "pascal",
".dpr": "pascal",
}

# Language detection by content patterns (fallback)
Expand Down Expand Up @@ -141,6 +148,17 @@ class LanguageDetector:
re.MULTILINE,
),
],
"pascal": [
re.compile(
r"^\s*(unit|program|library)\s+\w+\s*;",
re.MULTILINE | re.IGNORECASE,
),
re.compile(
r"^\s*(procedure|function)\s+\w+",
re.MULTILINE | re.IGNORECASE,
),
re.compile(r"\bbegin\b", re.MULTILINE | re.IGNORECASE),
],
}

@classmethod
Expand Down Expand Up @@ -280,6 +298,10 @@ class DocumentLoader:
".swift", # Swift
".cs",
".csx", # C#
".pas",
".pp",
".lpr",
".dpr", # Object Pascal / Delphi / Free Pascal
}

SUPPORTED_EXTENSIONS: set[str] = DOCUMENT_EXTENSIONS | CODE_EXTENSIONS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"csharp": ["*.cs"],
"c": ["*.c", "*.h"],
"cpp": ["*.cpp", "*.hpp", "*.cc", "*.hh"],
"pascal": ["*.pas", "*.pp", "*.lpr", "*.dpr"],
"web": ["*.html", "*.css", "*.scss", "*.jsx", "*.tsx"],
"docs": ["*.md", "*.txt", "*.rst", "*.pdf"],
"text": ["*.md", "*.txt", "*.rst"],
Expand Down Expand Up @@ -53,6 +54,11 @@
"*.hpp",
"*.cc",
"*.hh",
# pascal
"*.pas",
"*.pp",
"*.lpr",
"*.dpr",
],
}

Expand Down
119 changes: 119 additions & 0 deletions agent-brain-server/tests/fixtures/sample.pas
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
{ Agent Brain - Sample Object Pascal unit for AST chunking tests }

unit SampleUnit;

interface

uses
SysUtils;

const
MAX_ITEMS = 100;
DEFAULT_COLOR = 'red';

type
TDirection = (dNorth, dSouth, dEast, dWest);

TPoint = record
X: Integer;
Y: Integer;
end;

TShape = class
private
FColor: string;
FVisible: Boolean;
public
constructor Create(const AColor: string);
destructor Destroy; override;
procedure SetColor(const AColor: string);
function GetColor: string;
function IsVisible: Boolean;
procedure Draw; virtual;
end;

TCircle = class(TShape)
private
FRadius: Double;
public
constructor Create(const AColor: string; ARadius: Double);
function GetArea: Double;
procedure Draw; override;
end;

function CalculateDistance(const A, B: TPoint): Double;
procedure PrintPoint(const P: TPoint);

implementation

uses
Math;

{ TShape implementation }

constructor TShape.Create(const AColor: string);
begin
FColor := AColor;
FVisible := True;
end;

destructor TShape.Destroy;
begin
inherited Destroy;
end;

procedure TShape.SetColor(const AColor: string);
begin
FColor := AColor;
end;

function TShape.GetColor: string;
begin
Result := FColor;
end;

function TShape.IsVisible: Boolean;
begin
Result := FVisible;
end;

procedure TShape.Draw;
begin
WriteLn('Drawing shape with color: ', FColor);
end;

{ TCircle implementation }

constructor TCircle.Create(const AColor: string; ARadius: Double);
begin
inherited Create(AColor);
FRadius := ARadius;
end;

function TCircle.GetArea: Double;
begin
Result := Pi * FRadius * FRadius;
end;

procedure TCircle.Draw;
begin
WriteLn('Drawing circle with radius: ', FRadius:0:2);
end;

{ Standalone routines }

function CalculateDistance(const A, B: TPoint): Double;
var
DX, DY: Double;
begin
DX := B.X - A.X;
DY := B.Y - A.Y;
Result := Sqrt(DX * DX + DY * DY);
end;

procedure PrintPoint(const P: TPoint);
begin
WriteLn(Format('Point(%d, %d)', [P.X, P.Y]));
end;

end.
6 changes: 4 additions & 2 deletions agent-brain-server/tests/test_file_type_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def test_code_preset_includes_all_languages(self) -> None:
"csharp",
"c",
"cpp",
"pascal",
]
for lang in language_presets:
lang_patterns = resolve_file_types([lang])
Expand Down Expand Up @@ -175,8 +176,8 @@ def test_c_preset_patterns(self) -> None:
result = resolve_file_types(["c"])
assert result == ["*.c", "*.h"]

def test_all_14_presets_exist(self) -> None:
"""Test that all 14 expected presets are defined."""
def test_all_15_presets_exist(self) -> None:
"""Test that all 15 expected presets are defined."""
expected_presets = {
"python",
"javascript",
Expand All @@ -187,6 +188,7 @@ def test_all_14_presets_exist(self) -> None:
"csharp",
"c",
"cpp",
"pascal",
"web",
"docs",
"code",
Expand Down
Loading