|
| 1 | +################################################################################ |
| 2 | +# Copyright IBM Corporation 2024 |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License. |
| 15 | +################################################################################ |
| 16 | + |
| 17 | +""" |
| 18 | +Backend module |
| 19 | +""" |
| 20 | + |
| 21 | +import subprocess |
| 22 | +import tempfile |
| 23 | +from pathlib import Path |
| 24 | +import shlex |
| 25 | +from typing import List |
| 26 | +import pandas as pd |
| 27 | +from pandas import DataFrame |
| 28 | + |
| 29 | +from cldk.utils.exceptions import CodeQLQueryExecutionException |
| 30 | + |
| 31 | + |
| 32 | +class CodeQLQueryRunner: |
| 33 | + """ |
| 34 | + A class for executing CodeQL queries against a CodeQL database. |
| 35 | +
|
| 36 | + Parameters |
| 37 | + ---------- |
| 38 | + database_path : str |
| 39 | + The path to the CodeQL database. |
| 40 | +
|
| 41 | + Attributes |
| 42 | + ---------- |
| 43 | + database_path : Path |
| 44 | + The path to the CodeQL database. |
| 45 | + temp_file_path : Path |
| 46 | + The path to the temporary query file. |
| 47 | + csv_output_file : Path |
| 48 | + The path to the CSV output file. |
| 49 | + temp_bqrs_file_path : Path |
| 50 | + The path to the temporary bqrs file. |
| 51 | + temp_qlpack_file : Path |
| 52 | + The path to the temporary qlpack file. |
| 53 | +
|
| 54 | + Methods |
| 55 | + ------- |
| 56 | + __enter__() |
| 57 | + Context entry that creates temporary files to execute a CodeQL query. |
| 58 | + execute(query_string, column_names) |
| 59 | + Writes the query to the temporary file and executes it against the specified CodeQL database. |
| 60 | + __exit__(exc_type, exc_val, exc_tb) |
| 61 | + Clean up resources used by the CodeQL analysis. |
| 62 | +
|
| 63 | + Raises |
| 64 | + ------ |
| 65 | + CodeQLQueryExecutionException |
| 66 | + If there is an error executing the query. |
| 67 | + """ |
| 68 | + |
| 69 | + def __init__(self, database_path: str): |
| 70 | + self.database_path: Path = Path(database_path) |
| 71 | + self.temp_file_path: Path = None |
| 72 | + |
| 73 | + def __enter__(self): |
| 74 | + """ |
| 75 | + Context entry that creates temporary files to execute a CodeQL query. |
| 76 | +
|
| 77 | + Returns |
| 78 | + ------- |
| 79 | + instance : object |
| 80 | + The instance of the class. |
| 81 | +
|
| 82 | + Notes |
| 83 | + ----- |
| 84 | + This method creates temporary files to hold the query and store their paths. |
| 85 | + """ |
| 86 | + |
| 87 | + # Create a temporary file to hold the query and store its path |
| 88 | + temp_file = tempfile.NamedTemporaryFile("w", delete=False, suffix=".ql") |
| 89 | + csv_file = tempfile.NamedTemporaryFile("w", delete=False, suffix=".csv") |
| 90 | + bqrs_file = tempfile.NamedTemporaryFile("w", delete=False, suffix=".bqrs") |
| 91 | + self.temp_file_path = Path(temp_file.name) |
| 92 | + self.csv_output_file = Path(csv_file.name) |
| 93 | + self.temp_bqrs_file_path = Path(bqrs_file.name) |
| 94 | + |
| 95 | + # Let's close the files, we'll reopen them by path when needed. |
| 96 | + temp_file.close() |
| 97 | + bqrs_file.close() |
| 98 | + csv_file.close() |
| 99 | + |
| 100 | + # Create a temporary qlpack.yml file |
| 101 | + self.temp_qlpack_file = self.temp_file_path.parent / "qlpack.yml" |
| 102 | + with self.temp_qlpack_file.open("w") as f: |
| 103 | + f.write("name: temp\n") |
| 104 | + f.write("version: 1.0.0\n") |
| 105 | + f.write("libraryPathDependencies: codeql/java-all\n") |
| 106 | + |
| 107 | + return self |
| 108 | + |
| 109 | + def execute(self, query_string: str, column_names: List[str]) -> DataFrame: |
| 110 | + """Writes the query to the temporary file and executes it against the specified CodeQL database. |
| 111 | +
|
| 112 | + Args: |
| 113 | + query_string (str): The CodeQL query string to be executed. |
| 114 | + column_names (List[str]): The list of column names for the CSV the CodeQL produces when we execute the query. |
| 115 | +
|
| 116 | + Returns: |
| 117 | + dict: A dictionary containing the resulting DataFrame. |
| 118 | +
|
| 119 | + Raises: |
| 120 | + RuntimeError: If the context manager is not entered using the 'with' statement. |
| 121 | + CodeQLQueryExecutionException: If there is an error executing the query. |
| 122 | + """ |
| 123 | + if not self.temp_file_path: |
| 124 | + raise RuntimeError("Context manager not entered. Use 'with' statement.") |
| 125 | + |
| 126 | + # Write the query to the temp file so we can execute it. |
| 127 | + self.temp_file_path.write_text(query_string) |
| 128 | + |
| 129 | + # Construct and execute the CodeQL CLI command asking for a JSON output. |
| 130 | + codeql_query_cmd = shlex.split(f"codeql query run {self.temp_file_path} --database={self.database_path} --output={self.temp_bqrs_file_path}", posix=False) |
| 131 | + |
| 132 | + call = subprocess.Popen(codeql_query_cmd, stdout=None, stderr=None) |
| 133 | + _, err = call.communicate() |
| 134 | + if call.returncode != 0: |
| 135 | + raise CodeQLQueryExecutionException(f"Error executing query: {err.stderr}") |
| 136 | + |
| 137 | + # Convert the bqrs file to a CSV file |
| 138 | + bqrs2csv_command = shlex.split(f"codeql bqrs decode --format=csv --output={self.csv_output_file} {self.temp_bqrs_file_path}", posix=False) |
| 139 | + |
| 140 | + # Read the CSV file content and cast it to a DataFrame |
| 141 | + |
| 142 | + call = subprocess.Popen(bqrs2csv_command, stdout=None, stderr=None) |
| 143 | + _, err = call.communicate() |
| 144 | + if call.returncode != 0: |
| 145 | + raise CodeQLQueryExecutionException(f"Error executing query: {err.stderr}") |
| 146 | + else: |
| 147 | + return pd.read_csv( |
| 148 | + self.csv_output_file, |
| 149 | + header=None, |
| 150 | + names=column_names, |
| 151 | + skiprows=[0], |
| 152 | + ) |
| 153 | + |
| 154 | + def __exit__(self, exc_type, exc_val, exc_tb): |
| 155 | + """ |
| 156 | + Clean up resources used by the CodeQL analysis. |
| 157 | +
|
| 158 | + Deletes the temporary files created during the analysis, including the temporary file path, |
| 159 | + the CSV output file, and the temporary QL pack file. |
| 160 | + """ |
| 161 | + if self.temp_file_path and self.temp_file_path.exists(): |
| 162 | + self.temp_file_path.unlink() |
| 163 | + |
| 164 | + if self.csv_output_file and self.csv_output_file.exists(): |
| 165 | + self.csv_output_file.unlink() |
| 166 | + |
| 167 | + if self.temp_qlpack_file and self.temp_qlpack_file.exists(): |
| 168 | + self.temp_qlpack_file.unlink() |
0 commit comments