codemode_python_benchmark/Makefile at main · imran31415/codemode_python_benchmark · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
.PHONY: help install setup clean test run benchmark test-regular test-codemode test-sandbox lint format check

# Default target
help:
	@echo "Code Mode Benchmark - Available Commands"
	@echo "========================================"
	@echo ""
	@echo "Setup & Installation:"
	@echo "  make install          Install Python dependencies"
	@echo "  make setup            Complete setup (venv + deps + .env)"
	@echo "  make venv             Create virtual environment"
	@echo "  make env              Create .env file from example"
	@echo ""
	@echo "Running:"
	@echo "  make run              Run full benchmark with Claude"
	@echo "  make run-quick        Run quick benchmark with Claude"
	@echo "  make run-gemini       Run full benchmark with Gemini"
	@echo "  make run-gemini-quick Run quick benchmark with Gemini"
	@echo "  make run-scenario     Run specific scenario (SCENARIO=<id>)"
	@echo "  make benchmark        Alias for 'make run'"
	@echo ""
	@echo "Testing:"
	@echo "  make test-regular     Test regular agent only"
	@echo "  make test-codemode    Test code mode agent only"
	@echo "  make test-sandbox     Test sandbox executor"
	@echo "  make test             Run all tests"
	@echo ""
	@echo "Maintenance:"
	@echo "  make clean            Remove cache and generated files"
	@echo "  make clean-all        Remove cache, venv, and results"
	@echo "  make format           Format code with black"
	@echo "  make lint             Lint code with flake8"
	@echo "  make check            Run format + lint"
	@echo ""
	@echo "Info:"
	@echo "  make show-results     Display last benchmark results"
	@echo "  make show-structure   Show project structure"

# Installation targets
install:
	@echo "Installing dependencies..."
	pip install -r requirements.txt
	@echo "✓ Dependencies installed"

venv:
	@echo "Creating virtual environment..."
	python3 -m venv venv
	@echo "✓ Virtual environment created"
	@echo ""
	@echo "To activate the virtual environment:"
	@echo "  source venv/bin/activate"

env:
	@if [ -f .env ]; then \
		echo ".env file already exists"; \
	else \
		cp .env.example .env; \
		echo "✓ Created .env file from .env.example"; \
		echo ""; \
		echo "⚠️  Please edit .env and add your ANTHROPIC_API_KEY"; \
	fi

setup: venv env
	@echo ""
	@echo "Installing dependencies in virtual environment..."
	./venv/bin/pip install -r requirements.txt
	@echo ""
	@echo "✓ Setup complete!"
	@echo ""
	@echo "Next steps:"
	@echo "  1. Activate the virtual environment: source venv/bin/activate"
	@echo "  2. Edit .env and add your ANTHROPIC_API_KEY"
	@echo "  3. Run the benchmark: make run"

# Running targets
run: check-env
	@echo "Running full benchmark..."
	python benchmark.py

benchmark: run

run-quick: check-env
	@echo "Running quick benchmark (first 2 scenarios)..."
	python benchmark.py --limit 2

run-gemini: check-env
	@echo "Running benchmark with Gemini..."
	python benchmark.py --model gemini

run-gemini-quick: check-env
	@echo "Running quick benchmark with Gemini..."
	python benchmark.py --model gemini --limit 2

run-scenario: check-env
	@echo "Running specific scenario..."
	@if [ -z "$(SCENARIO)" ]; then \
		echo "Usage: make run-scenario SCENARIO=<id>"; \
		echo "Example: make run-scenario SCENARIO=1"; \
		exit 1; \
	fi
	python benchmark.py --scenario $(SCENARIO)

# Testing targets
test-regular: check-env
	@echo "Testing Regular Agent..."
	python agents/regular_agent.py

test-codemode: check-env
	@echo "Testing Code Mode Agent..."
	python agents/codemode_agent.py

test-sandbox:
	@echo "Testing Sandbox Executor..."
	python sandbox/executor.py

test: test-sandbox test-regular test-codemode
	@echo ""
	@echo "✓ All tests completed"

# Maintenance targets
clean:
	@echo "Cleaning cache and temporary files..."
	find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
	find . -type f -name "*.pyc" -delete
	find . -type f -name "*.pyo" -delete
	find . -type f -name "*.log" -delete
	find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true
	@echo "✓ Cleaned"

clean-all: clean
	@echo "Removing virtual environment and results..."
	rm -rf venv
	rm -f benchmark_results.json
	@echo "✓ Deep clean complete"

format:
	@echo "Formatting code with black..."
	@if command -v black >/dev/null 2>&1; then \
		black agents/ tools/ sandbox/ benchmark.py; \
		echo "✓ Code formatted"; \
	else \
		echo "⚠️  black not installed. Install with: pip install black"; \
	fi

lint:
	@echo "Linting code with flake8..."
	@if command -v flake8 >/dev/null 2>&1; then \
		flake8 agents/ tools/ sandbox/ benchmark.py --max-line-length=120 --ignore=E203,W503; \
		echo "✓ Linting passed"; \
	else \
		echo "⚠️  flake8 not installed. Install with: pip install flake8"; \
	fi

check: format lint
	@echo "✓ Code check complete"

# Info targets
show-results:
	@if [ -f benchmark_results.json ]; then \
		echo "Last Benchmark Results:"; \
		echo "======================"; \
		python -m json.tool benchmark_results.json | head -100; \
	else \
		echo "No results found. Run 'make benchmark' first."; \
	fi

show-structure:
	@echo "Project Structure:"
	@echo "=================="
	@tree -I '__pycache__|*.pyc|venv|.git' -L 3 || find . -type f -name "*.py" | grep -v __pycache__ | sort

# Utility targets
check-env:
	@if [ ! -f .env ]; then \
		echo "⚠️  .env file not found"; \
		echo "Run 'make env' to create it"; \
		exit 1; \
	fi
	@if ! grep -qE "(ANTHROPIC_API_KEY=sk-|GOOGLE_API_KEY=)" .env 2>/dev/null; then \
		echo "⚠️  No API keys configured in .env"; \
		echo "Please edit .env and add at least one API key:"; \
		echo "  - ANTHROPIC_API_KEY for Claude"; \
		echo "  - GOOGLE_API_KEY for Gemini"; \
		exit 1; \
	fi

# Quick start - one command to set everything up
quick-start: setup
	@echo ""
	@echo "Quick start setup complete!"
	@echo ""
	@echo "⚠️  Don't forget to:"
	@echo "  1. Activate venv: source venv/bin/activate"
	@echo "  2. Add your API key to .env"
	@echo "  3. Run: make benchmark"