Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
5fba7ac
refactron: backup before refactoring /Users/omsherikar/Refactron_Root…
omsherikar Feb 7, 2026
c5eacb8
refactron: backup before refactoring /Users/omsherikar/Refactron_Root…
omsherikar Feb 7, 2026
e9bb329
release: v1.0.15 - LLM/RAG integration, test fixes, ML foundation
omsherikar Feb 8, 2026
fdc51c7
fix: add missing GroqClient import in RAG indexer
omsherikar Feb 8, 2026
ebd6341
chore: exclude test repo artifacts from version control
omsherikar Feb 8, 2026
78bd8fc
security: fix incomplete URL sanitization vulnerability
omsherikar Feb 8, 2026
62dce26
fix: add Python 3.8 compatibility for tree-sitter Language API
omsherikar Feb 8, 2026
ebeebd0
style: format code with black
omsherikar Feb 8, 2026
5fed019
style: apply pre-commit auto-fixes
omsherikar Feb 8, 2026
1dd3ce8
fix: add noqa comments and cleanup linting issues
omsherikar Feb 8, 2026
adeade6
fix: resolve Python 3.8 compatibility issues in RAG parser and finger…
omsherikar Feb 8, 2026
d5db79e
Delete complex_test_repo/core/__init__.py
omsherikar Feb 8, 2026
957c782
Delete complex_test_repo/core/engine.py
omsherikar Feb 8, 2026
58009d7
Delete complex_test_repo/data/__init__.py
omsherikar Feb 8, 2026
bae9ef9
Delete complex_test_repo/data/processor.py
omsherikar Feb 8, 2026
3db4b3c
Delete complex_test_repo/utils/__init__.py
omsherikar Feb 8, 2026
a8bf2b3
Delete complex_test_repo/utils/math_lib.py
omsherikar Feb 8, 2026
2084221
Delete complex_test_repo/utils/string_helper.py
omsherikar Feb 8, 2026
a1e41d8
Delete complex_test_repo/main.py
omsherikar Feb 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,10 @@ refactron_incremental_state.json
# Exclude analysis files
DIRECTORY_ANALYSIS.md
FEATURES.md

# Test repository artifacts (generated during testing)
complex_test_repo/.rag/
complex_test_repo/.refactron.yaml
complex_test_repo/utils/math_lib_doc.md
feedback_analysis.json
verify_full_ecosystem.py
10 changes: 9 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "refactron"
version = "1.0.14"
version = "1.0.15"
description = "Python code analysis and refactoring tool with security scanning, performance detection, and automated fixes"
readme = "README.md"
requires-python = ">=3.8"
Expand Down Expand Up @@ -35,6 +35,14 @@ dependencies = [
"radon>=6.0.0",
"requests>=2.25.0",
"astroid>=3.0.0",
# RAG Infrastructure
"chromadb>=0.4.22",
"tree-sitter>=0.20.4",
"tree-sitter-python>=0.20.4",
"sentence-transformers>=2.5.1",
# LLM Integration (Free Cloud APIs)
"groq>=0.4.0", # Free cloud LLM (Llama 3, Mixtral)
"pydantic>=2.6.0",
]

[project.optional-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion refactron/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from refactron.core.refactor_result import RefactorResult
from refactron.core.refactron import Refactron

__version__ = "1.0.14"
__version__ = "1.0.15"
__author__ = "Om Sherikar"

__all__ = [
Expand Down
116 changes: 90 additions & 26 deletions refactron/analyzers/security_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import ast
import fnmatch
from pathlib import Path
from typing import List
from typing import Dict, List

from refactron.analyzers.base_analyzer import BaseAnalyzer
from refactron.core.models import CodeIssue, IssueCategory, IssueLevel
Expand All @@ -24,6 +24,8 @@ class SecurityAnalyzer(BaseAnalyzer):
"compile": "Potential code injection - use with extreme caution",
"__import__": "Dynamic imports can be dangerous - use importlib instead",
"input": "In Python 2, input() evaluates code - use raw_input() or upgrade to Python 3",
"system": "Command injection risk - uses a shell. Use subprocess.run() instead",
"popen": "Command injection risk - uses a shell. Use subprocess.Popen() with a list instead", # noqa: E501
}

# Dangerous modules
Expand Down Expand Up @@ -125,6 +127,9 @@ def analyze(self, file_path: Path, source_code: str) -> List[CodeIssue]:
try:
tree = ast.parse(source_code)

# Map of local names to full module/function paths (alias tracking)
self._alias_map = self._build_alias_map(tree)

# Check for various security issues
issues.extend(self._check_dangerous_functions(tree, file_path))
issues.extend(self._check_dangerous_imports(tree, file_path))
Expand All @@ -139,8 +144,20 @@ def analyze(self, file_path: Path, source_code: str) -> List[CodeIssue]:
issues.extend(self._check_insecure_random(tree, file_path))
issues.extend(self._check_weak_ssl_tls(tree, file_path))

except SyntaxError:
pass
except SyntaxError as e:
# Report syntax errors as security risks
issues.append(
CodeIssue(
category=IssueCategory.SECURITY,
level=IssueLevel.ERROR,
message=f"Syntax error prevents security analysis: {str(e)}",
file_path=file_path,
line_number=getattr(e, "lineno", 1),
suggestion="Fix the syntax error to enable automated security scanning.",
rule_id="SEC000",
confidence=1.0,
)
)

# Filter out whitelisted rules and low confidence issues
filtered_issues = []
Expand All @@ -155,6 +172,21 @@ def analyze(self, file_path: Path, source_code: str) -> List[CodeIssue]:

return filtered_issues

def _build_alias_map(self, tree: ast.AST) -> Dict[str, str]:
"""Build a map of local names to their full qualified names (alias tracking)."""
aliases = {}
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
if alias.asname:
aliases[alias.asname] = alias.name
elif isinstance(node, ast.ImportFrom) and node.module:
for alias in node.names:
local_name = alias.asname if alias.asname else alias.name
if local_name != "*":
aliases[local_name] = f"{node.module}.{alias.name}"
return aliases

def _check_dangerous_functions(self, tree: ast.AST, file_path: Path) -> List[CodeIssue]:
"""Check for dangerous built-in functions."""
issues = []
Expand Down Expand Up @@ -351,33 +383,60 @@ def _check_command_injection(self, tree: ast.AST, file_path: Path) -> List[CodeI
"""Check for command injection vulnerabilities."""
issues = []

dangerous_calls = ["os.system", "subprocess.call", "subprocess.Popen", "os.popen"]
# Functions that always use a shell and are dangerous
always_shell = ["os.system", "os.popen", "system", "popen"]
# Functions that are dangerous specifically when shell=True is passed
shell_optional = [
"subprocess.call",
"subprocess.Popen",
"subprocess.run",
"subprocess.check_call",
"subprocess.check_output",
]

for node in ast.walk(tree):
if isinstance(node, ast.Call):
func_name = self._get_full_function_name(node.func)

if any(dangerous in func_name for dangerous in dangerous_calls):
# Check if shell=True is used
# Check for "always shell" functions
if any(
dangerous == func_name or func_name.endswith(f".{dangerous}")
for dangerous in always_shell
):
Comment on lines +402 to +405
Copy link

Copilot AI Feb 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

always_shell includes bare names like "system"/"popen", and the matching logic flags any call whose resolved name ends with .system/.popen. This will produce false positives for unrelated methods (e.g., obj.system()). Since you now build an alias map, you can restrict detection to fully-qualified os.system/os.popen (including aliases that resolve to those) and drop the generic suffix matching.

Suggested change
if any(
dangerous == func_name or func_name.endswith(f".{dangerous}")
for dangerous in always_shell
):
if func_name in always_shell:

Copilot uses AI. Check for mistakes.
issue = CodeIssue(
category=IssueCategory.SECURITY,
level=IssueLevel.CRITICAL,
message=f"Command injection risk: {func_name}() uses a shell",
file_path=file_path,
line_number=node.lineno,
suggestion="Avoid functions that use a shell. Use subprocess.run() with a list of arguments instead.", # noqa: E501
rule_id="SEC0051",
confidence=0.95,
)
issues.append(issue)
continue

# Check for "shell=True" in optional functions
if any(dangerous in func_name for dangerous in shell_optional):
is_shell_true = False
for keyword in node.keywords:
if keyword.arg == "shell" and isinstance(keyword.value, ast.Constant):
if keyword.value.value is True:
issue = CodeIssue(
category=IssueCategory.SECURITY,
level=IssueLevel.CRITICAL,
message=(
f"Command injection risk: {func_name}() with shell=True"
),
file_path=file_path,
line_number=node.lineno,
suggestion=(
"Avoid shell=True. Use subprocess with list of arguments "
"instead"
),
rule_id="SEC005",
confidence=0.95,
)
issues.append(issue)
is_shell_true = True
break

if is_shell_true:
issue = CodeIssue(
category=IssueCategory.SECURITY,
level=IssueLevel.CRITICAL,
message=f"Command injection risk: {func_name}() with shell=True",
file_path=file_path,
line_number=node.lineno,
suggestion="Avoid shell=True. Use subprocess with list of arguments instead.", # noqa: E501
rule_id="SEC0052",
confidence=0.95,
)
issues.append(issue)

return issues

Expand Down Expand Up @@ -462,13 +521,16 @@ def _get_function_name(self, node: ast.AST) -> str:
return ""

def _get_full_function_name(self, node: ast.AST) -> str:
"""Get full qualified function name (e.g., 'os.system')."""
"""Get full qualified function name (e.g., 'os.system'), resolving aliases."""
if isinstance(node, ast.Name):
return node.id
# Resolve alias if it exists
return self._alias_map.get(node.id, node.id)
elif isinstance(node, ast.Attribute):
value_name = self._get_full_function_name(node.value)
if value_name:
return f"{value_name}.{node.attr}"
full_name = f"{value_name}.{node.attr}"
# Check for secondary aliases (e.g., 'o.system' where 'o' is 'os')
return self._alias_map.get(full_name, full_name)
return node.attr
return ""

Expand All @@ -483,13 +545,15 @@ def _check_sql_parameterization(self, tree: ast.AST, file_path: Path) -> List[Co
if isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name):
# Check if the assignment uses string concatenation or .format()
# Check if the assignment uses string concatenation, f-strings, or .format()
if isinstance(node.value, ast.BinOp) and isinstance(node.value.op, ast.Add):
# Check if at least one side is a string
if isinstance(node.value.left, ast.Constant) or isinstance(
node.value.right, ast.Constant
):
string_concat_vars[target.id] = node.lineno
elif isinstance(node.value, ast.JoinedStr): # f-string
string_concat_vars[target.id] = node.lineno
elif isinstance(node.value, ast.Call) and isinstance(
node.value.func, ast.Attribute
):
Expand Down
Loading
Loading