-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathMakefile
More file actions
195 lines (171 loc) · 5.84 KB
/
Makefile
File metadata and controls
195 lines (171 loc) · 5.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
.PHONY: help install setup clean test run benchmark test-regular test-codemode test-sandbox lint format check
# Default target
help:
@echo "Code Mode Benchmark - Available Commands"
@echo "========================================"
@echo ""
@echo "Setup & Installation:"
@echo " make install Install Python dependencies"
@echo " make setup Complete setup (venv + deps + .env)"
@echo " make venv Create virtual environment"
@echo " make env Create .env file from example"
@echo ""
@echo "Running:"
@echo " make run Run full benchmark with Claude"
@echo " make run-quick Run quick benchmark with Claude"
@echo " make run-gemini Run full benchmark with Gemini"
@echo " make run-gemini-quick Run quick benchmark with Gemini"
@echo " make run-scenario Run specific scenario (SCENARIO=<id>)"
@echo " make benchmark Alias for 'make run'"
@echo ""
@echo "Testing:"
@echo " make test-regular Test regular agent only"
@echo " make test-codemode Test code mode agent only"
@echo " make test-sandbox Test sandbox executor"
@echo " make test Run all tests"
@echo ""
@echo "Maintenance:"
@echo " make clean Remove cache and generated files"
@echo " make clean-all Remove cache, venv, and results"
@echo " make format Format code with black"
@echo " make lint Lint code with flake8"
@echo " make check Run format + lint"
@echo ""
@echo "Info:"
@echo " make show-results Display last benchmark results"
@echo " make show-structure Show project structure"
# Installation targets
install:
@echo "Installing dependencies..."
pip install -r requirements.txt
@echo "✓ Dependencies installed"
venv:
@echo "Creating virtual environment..."
python3 -m venv venv
@echo "✓ Virtual environment created"
@echo ""
@echo "To activate the virtual environment:"
@echo " source venv/bin/activate"
env:
@if [ -f .env ]; then \
echo ".env file already exists"; \
else \
cp .env.example .env; \
echo "✓ Created .env file from .env.example"; \
echo ""; \
echo "⚠️ Please edit .env and add your ANTHROPIC_API_KEY"; \
fi
setup: venv env
@echo ""
@echo "Installing dependencies in virtual environment..."
./venv/bin/pip install -r requirements.txt
@echo ""
@echo "✓ Setup complete!"
@echo ""
@echo "Next steps:"
@echo " 1. Activate the virtual environment: source venv/bin/activate"
@echo " 2. Edit .env and add your ANTHROPIC_API_KEY"
@echo " 3. Run the benchmark: make run"
# Running targets
run: check-env
@echo "Running full benchmark..."
python benchmark.py
benchmark: run
run-quick: check-env
@echo "Running quick benchmark (first 2 scenarios)..."
python benchmark.py --limit 2
run-gemini: check-env
@echo "Running benchmark with Gemini..."
python benchmark.py --model gemini
run-gemini-quick: check-env
@echo "Running quick benchmark with Gemini..."
python benchmark.py --model gemini --limit 2
run-scenario: check-env
@echo "Running specific scenario..."
@if [ -z "$(SCENARIO)" ]; then \
echo "Usage: make run-scenario SCENARIO=<id>"; \
echo "Example: make run-scenario SCENARIO=1"; \
exit 1; \
fi
python benchmark.py --scenario $(SCENARIO)
# Testing targets
test-regular: check-env
@echo "Testing Regular Agent..."
python agents/regular_agent.py
test-codemode: check-env
@echo "Testing Code Mode Agent..."
python agents/codemode_agent.py
test-sandbox:
@echo "Testing Sandbox Executor..."
python sandbox/executor.py
test: test-sandbox test-regular test-codemode
@echo ""
@echo "✓ All tests completed"
# Maintenance targets
clean:
@echo "Cleaning cache and temporary files..."
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
find . -type f -name "*.pyc" -delete
find . -type f -name "*.pyo" -delete
find . -type f -name "*.log" -delete
find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true
@echo "✓ Cleaned"
clean-all: clean
@echo "Removing virtual environment and results..."
rm -rf venv
rm -f benchmark_results.json
@echo "✓ Deep clean complete"
format:
@echo "Formatting code with black..."
@if command -v black >/dev/null 2>&1; then \
black agents/ tools/ sandbox/ benchmark.py; \
echo "✓ Code formatted"; \
else \
echo "⚠️ black not installed. Install with: pip install black"; \
fi
lint:
@echo "Linting code with flake8..."
@if command -v flake8 >/dev/null 2>&1; then \
flake8 agents/ tools/ sandbox/ benchmark.py --max-line-length=120 --ignore=E203,W503; \
echo "✓ Linting passed"; \
else \
echo "⚠️ flake8 not installed. Install with: pip install flake8"; \
fi
check: format lint
@echo "✓ Code check complete"
# Info targets
show-results:
@if [ -f benchmark_results.json ]; then \
echo "Last Benchmark Results:"; \
echo "======================"; \
python -m json.tool benchmark_results.json | head -100; \
else \
echo "No results found. Run 'make benchmark' first."; \
fi
show-structure:
@echo "Project Structure:"
@echo "=================="
@tree -I '__pycache__|*.pyc|venv|.git' -L 3 || find . -type f -name "*.py" | grep -v __pycache__ | sort
# Utility targets
check-env:
@if [ ! -f .env ]; then \
echo "⚠️ .env file not found"; \
echo "Run 'make env' to create it"; \
exit 1; \
fi
@if ! grep -qE "(ANTHROPIC_API_KEY=sk-|GOOGLE_API_KEY=)" .env 2>/dev/null; then \
echo "⚠️ No API keys configured in .env"; \
echo "Please edit .env and add at least one API key:"; \
echo " - ANTHROPIC_API_KEY for Claude"; \
echo " - GOOGLE_API_KEY for Gemini"; \
exit 1; \
fi
# Quick start - one command to set everything up
quick-start: setup
@echo ""
@echo "Quick start setup complete!"
@echo ""
@echo "⚠️ Don't forget to:"
@echo " 1. Activate venv: source venv/bin/activate"
@echo " 2. Add your API key to .env"
@echo " 3. Run: make benchmark"