Skip to content

Commit a316c8f

Browse files
committed
Create a template for python analysis
Signed-off-by: Rahul Krishna <i.m.ralk@gmail.com>
1 parent 5be2b65 commit a316c8f

File tree

11 files changed

+1088
-0
lines changed

11 files changed

+1088
-0
lines changed

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.12

NOTICE

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
This project includes components that are subject to additional license terms.
2+
3+
--- CodeQL License Notice ---
4+
5+
This project uses CodeQL, a product of GitHub, Inc.
6+
7+
CodeQL is licensed under the GitHub CodeQL Terms and Conditions:
8+
https://securitylab.github.com/tools/codeql/license
9+
10+
Unless you have a separate agreement with GitHub, you may only use CodeQL under one of the following conditions:
11+
12+
- You are an individual using CodeQL for **personal, non-commercial research**.
13+
- You are using CodeQL as part of an **open source project** hosted on GitHub.com, and your project is not directed by a for-profit company or intended for commercial use.
14+
15+
Any other use of CodeQL, including its use on proprietary code or in closed-source or commercial projects, requires a separate commercial license from GitHub.
16+
17+
You can view the full CodeQL license at:
18+
https://github.com/github/codeql/blob/main/LICENSE

pyproject.toml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
[project]
2+
name = "codeanalyzer"
3+
version = "0.1.0"
4+
description = "Static Analysis on Python source code using Jedi, CodeQL and Treesitter."
5+
readme = "README.md"
6+
authors = [
7+
{ name = "Rahul Krishna", email = "i.m.ralk@gmail.com" }
8+
]
9+
requires-python = ">=3.12"
10+
11+
dependencies = [
12+
"toml>=0.10.2",
13+
"typer>=0.16.0",
14+
]
15+
16+
[dependency-groups]
17+
test = [
18+
"pytest>=8.3.5",
19+
"pytest-asyncio>=1.0.0",
20+
"pytest-cov>=6.1.1",
21+
"pytest-pspec>=0.0.4",
22+
]
23+
dev = [
24+
"ipdb>=0.13.13",
25+
]
26+
27+
[project.scripts]
28+
codeanalyzer = "codeanalyzer.__main__:app"
29+
30+
[build-system]
31+
requires = ["hatchling"]
32+
build-backend = "hatchling.build"
33+
34+
[tool.hatch.build]
35+
# Tell hatchling to use the src layout
36+
sources = ["src"]
37+
38+
[tool.hatch.build.targets.wheel]
39+
packages = ["src/codeanalyzer"]
40+
include = ["src/codeanalyzer/py.typed"]
41+
42+
[tool.hatch.build.targets.sdist]
43+
include = [
44+
"src/codeanalyzer",
45+
"src/codeanalyzer/py.typed",
46+
"README.md",
47+
"LICENSE",
48+
"NOTICE"
49+
]

src/codeanalyzer/__init__.py

Whitespace-only changes.

src/codeanalyzer/__main__.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import typer
2+
from typing import *
3+
from pathlib import Path
4+
5+
app = typer.Typer(
6+
name="codeanalyzer",
7+
help="Static Analysis on Python source code using Jedi, CodeQL and Tree sitter.",
8+
no_args_is_help=True,
9+
add_completion=False,
10+
rich_markup_mode="rich",
11+
pretty_exceptions_show_locals=False,
12+
)
13+
14+
15+
@app.command()
16+
def main(
17+
input: Annotated[Path, typer.Option("-i", "--input", help="Path to the project root directory.")],
18+
output: Annotated[Optional[Path], typer.Option("-o", "--output", help="Destination directory to save the output graphs. By default, the SDG formatted as a JSON will be printed to the console.")] = None,
19+
analysis_level: Annotated[int, typer.Option("-a", "--analysis-level", help="Level of analysis to perform. Options: 1 (for just symbol table) or 2 (for call graph). Default: 1")] = 1,
20+
verbose: Annotated[bool, typer.Option("-v", "--verbose", help="Print logs to console.")] = False,
21+
target_files: Annotated[Optional[List[str]], typer.Option("-t", "--target-files", help="For each file user wants to perform source analysis on top of existing analysis.json")] = None,
22+
):
23+
"""
24+
Static Analysis on Python source code using Jedi, CodeQL and Tree sitter.
25+
"""
26+
pass
27+
28+
29+
if __name__ == "__main__":
30+
app()
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
################################################################################
2+
# Copyright IBM Corporation 2024
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
################################################################################
16+
17+
"""
18+
CodeQL package
19+
"""
20+
21+
from .codeql import JCodeQL
22+
23+
__all__ = ["JCodeQL"]

src/codeanalyzer/codeql/backend.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
################################################################################
2+
# Copyright IBM Corporation 2024
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
################################################################################
16+
17+
"""
18+
Backend module
19+
"""
20+
21+
import subprocess
22+
import tempfile
23+
from pathlib import Path
24+
import shlex
25+
from typing import List
26+
import pandas as pd
27+
from pandas import DataFrame
28+
29+
from cldk.utils.exceptions import CodeQLQueryExecutionException
30+
31+
32+
class CodeQLQueryRunner:
33+
"""
34+
A class for executing CodeQL queries against a CodeQL database.
35+
36+
Parameters
37+
----------
38+
database_path : str
39+
The path to the CodeQL database.
40+
41+
Attributes
42+
----------
43+
database_path : Path
44+
The path to the CodeQL database.
45+
temp_file_path : Path
46+
The path to the temporary query file.
47+
csv_output_file : Path
48+
The path to the CSV output file.
49+
temp_bqrs_file_path : Path
50+
The path to the temporary bqrs file.
51+
temp_qlpack_file : Path
52+
The path to the temporary qlpack file.
53+
54+
Methods
55+
-------
56+
__enter__()
57+
Context entry that creates temporary files to execute a CodeQL query.
58+
execute(query_string, column_names)
59+
Writes the query to the temporary file and executes it against the specified CodeQL database.
60+
__exit__(exc_type, exc_val, exc_tb)
61+
Clean up resources used by the CodeQL analysis.
62+
63+
Raises
64+
------
65+
CodeQLQueryExecutionException
66+
If there is an error executing the query.
67+
"""
68+
69+
def __init__(self, database_path: str):
70+
self.database_path: Path = Path(database_path)
71+
self.temp_file_path: Path = None
72+
73+
def __enter__(self):
74+
"""
75+
Context entry that creates temporary files to execute a CodeQL query.
76+
77+
Returns
78+
-------
79+
instance : object
80+
The instance of the class.
81+
82+
Notes
83+
-----
84+
This method creates temporary files to hold the query and store their paths.
85+
"""
86+
87+
# Create a temporary file to hold the query and store its path
88+
temp_file = tempfile.NamedTemporaryFile("w", delete=False, suffix=".ql")
89+
csv_file = tempfile.NamedTemporaryFile("w", delete=False, suffix=".csv")
90+
bqrs_file = tempfile.NamedTemporaryFile("w", delete=False, suffix=".bqrs")
91+
self.temp_file_path = Path(temp_file.name)
92+
self.csv_output_file = Path(csv_file.name)
93+
self.temp_bqrs_file_path = Path(bqrs_file.name)
94+
95+
# Let's close the files, we'll reopen them by path when needed.
96+
temp_file.close()
97+
bqrs_file.close()
98+
csv_file.close()
99+
100+
# Create a temporary qlpack.yml file
101+
self.temp_qlpack_file = self.temp_file_path.parent / "qlpack.yml"
102+
with self.temp_qlpack_file.open("w") as f:
103+
f.write("name: temp\n")
104+
f.write("version: 1.0.0\n")
105+
f.write("libraryPathDependencies: codeql/java-all\n")
106+
107+
return self
108+
109+
def execute(self, query_string: str, column_names: List[str]) -> DataFrame:
110+
"""Writes the query to the temporary file and executes it against the specified CodeQL database.
111+
112+
Args:
113+
query_string (str): The CodeQL query string to be executed.
114+
column_names (List[str]): The list of column names for the CSV the CodeQL produces when we execute the query.
115+
116+
Returns:
117+
dict: A dictionary containing the resulting DataFrame.
118+
119+
Raises:
120+
RuntimeError: If the context manager is not entered using the 'with' statement.
121+
CodeQLQueryExecutionException: If there is an error executing the query.
122+
"""
123+
if not self.temp_file_path:
124+
raise RuntimeError("Context manager not entered. Use 'with' statement.")
125+
126+
# Write the query to the temp file so we can execute it.
127+
self.temp_file_path.write_text(query_string)
128+
129+
# Construct and execute the CodeQL CLI command asking for a JSON output.
130+
codeql_query_cmd = shlex.split(f"codeql query run {self.temp_file_path} --database={self.database_path} --output={self.temp_bqrs_file_path}", posix=False)
131+
132+
call = subprocess.Popen(codeql_query_cmd, stdout=None, stderr=None)
133+
_, err = call.communicate()
134+
if call.returncode != 0:
135+
raise CodeQLQueryExecutionException(f"Error executing query: {err.stderr}")
136+
137+
# Convert the bqrs file to a CSV file
138+
bqrs2csv_command = shlex.split(f"codeql bqrs decode --format=csv --output={self.csv_output_file} {self.temp_bqrs_file_path}", posix=False)
139+
140+
# Read the CSV file content and cast it to a DataFrame
141+
142+
call = subprocess.Popen(bqrs2csv_command, stdout=None, stderr=None)
143+
_, err = call.communicate()
144+
if call.returncode != 0:
145+
raise CodeQLQueryExecutionException(f"Error executing query: {err.stderr}")
146+
else:
147+
return pd.read_csv(
148+
self.csv_output_file,
149+
header=None,
150+
names=column_names,
151+
skiprows=[0],
152+
)
153+
154+
def __exit__(self, exc_type, exc_val, exc_tb):
155+
"""
156+
Clean up resources used by the CodeQL analysis.
157+
158+
Deletes the temporary files created during the analysis, including the temporary file path,
159+
the CSV output file, and the temporary QL pack file.
160+
"""
161+
if self.temp_file_path and self.temp_file_path.exists():
162+
self.temp_file_path.unlink()
163+
164+
if self.csv_output_file and self.csv_output_file.exists():
165+
self.csv_output_file.unlink()
166+
167+
if self.temp_qlpack_file and self.temp_qlpack_file.exists():
168+
self.temp_qlpack_file.unlink()

0 commit comments

Comments
 (0)