diff --git a/src/fosslight_binary/_binary.py b/src/fosslight_binary/_binary.py index b45cd7a..073c1af 100755 --- a/src/fosslight_binary/_binary.py +++ b/src/fosslight_binary/_binary.py @@ -2,18 +2,15 @@ # -*- coding: utf-8 -*- # Copyright (c) 2020 LG Electronics Inc. # SPDX-License-Identifier: Apache-2.0 -import os import urllib.parse import logging import fosslight_util.constant as constant -from typing import Tuple from fosslight_util.oss_item import FileItem EXCLUDE_TRUE_VALUE = "Exclude" TLSH_CHECKSUM_NULL = "0" MAX_EXCEL_URL_LENGTH = 255 EXCEEDED_VUL_URL_LENGTH_COMMENT = f"Exceeded the maximum vulnerability URL length of {MAX_EXCEL_URL_LENGTH} characters." -_PACKAGE_DIR = ["node_modules", "venv", "Pods", "Carthage"] logger = logging.getLogger(constant.LOGGER_NAME) @@ -111,15 +108,3 @@ def get_print_json(self): if self.comment: json_item["comment"] = self.comment return items - - -def is_package_dir(bin_with_path: str, _root_path: str) -> Tuple[bool, str]: - is_pkg = False - pkg_path = "" - path_parts = bin_with_path.split(os.path.sep) - for pkg_dir in _PACKAGE_DIR: - if pkg_dir in path_parts: - pkg_index = path_parts.index(pkg_dir) - pkg_path = os.path.sep.join(path_parts[:pkg_index + 1]).replace(_root_path, '', 1) - is_pkg = True - return is_pkg, pkg_path diff --git a/src/fosslight_binary/_jar_analysis.py b/src/fosslight_binary/_jar_analysis.py index 588ef1a..2d89941 100644 --- a/src/fosslight_binary/_jar_analysis.py +++ b/src/fosslight_binary/_jar_analysis.py @@ -9,7 +9,7 @@ import subprocess from fosslight_binary import get_dependency_check_script import fosslight_util.constant as constant -from fosslight_binary._binary import BinaryItem, VulnerabilityItem, is_package_dir +from fosslight_binary._binary import BinaryItem, VulnerabilityItem from fosslight_util.oss_item import OssItem logger = logging.getLogger(constant.LOGGER_NAME) @@ -91,10 +91,6 @@ def merge_binary_list(owasp_items, vulnerability_items, bin_list): bin_item.binary_name_without_path = os.path.basename(key) bin_item.source_name_or_path = key - is_pkg, _ = is_package_dir(bin_item.source_name_or_path, '') - if is_pkg: - continue - bin_item.set_oss_items(oss_list) not_found_bin.append(bin_item) @@ -246,30 +242,15 @@ def analyze_jar_file(path_to_find_bin, path_to_exclude): # Even if the oss info is from pom.xml in jar file, the file name will be .jar file. # But the oss info from pom.xml could be different from .jar file. bin_with_path = val.get("filePath") - - if any(os.path.commonpath([bin_with_path, exclude_path]) == exclude_path - for exclude_path in path_to_exclude): + bin_with_path_rel = os.path.relpath(bin_with_path, path_to_find_bin) + # Check if bin_with_path should be excluded (compare relative paths) + if bin_with_path_rel in path_to_exclude: continue - if not bin_with_path.endswith('.jar'): - bin_with_path = bin_with_path.split('.jar')[0] + '.jar' - - try: - path_to_fild_bin_abs = os.path.abspath(path_to_find_bin) - bin_with_path_abs = os.path.abspath(bin_with_path) - if os.name == 'nt': # Windows - drive_bin = os.path.splitdrive(bin_with_path_abs)[0].lower() - drive_root = os.path.splitdrive(path_to_fild_bin_abs)[0].lower() - # Different drive or UNC root -> fallback to basename - if drive_bin and drive_root and drive_bin != drive_root: - file_with_path = os.path.basename(bin_with_path_abs) - else: - file_with_path = os.path.relpath(bin_with_path_abs, path_to_fild_bin_abs) - else: - file_with_path = os.path.relpath(bin_with_path_abs, path_to_fild_bin_abs) - except Exception as e: - file_with_path = os.path.basename(bin_with_path) - logger.error(f"relpath error: {e}; fallback basename: {file_with_path}") + if not bin_with_path_rel.endswith('.jar'): + bin_with_path_rel = bin_with_path_rel.split('.jar')[0] + '.jar' + + file_with_path = bin_with_path_rel # First, Get OSS Name and Version info from pkg_info for pkg_info in all_pkg_info: diff --git a/src/fosslight_binary/binary_analysis.py b/src/fosslight_binary/binary_analysis.py index ceb45e6..ccb73fe 100755 --- a/src/fosslight_binary/binary_analysis.py +++ b/src/fosslight_binary/binary_analysis.py @@ -16,7 +16,7 @@ import fosslight_util.constant as constant from fosslight_util.output_format import check_output_formats_v2, write_output_file from ._binary_dao import get_oss_info_from_db -from ._binary import BinaryItem, TLSH_CHECKSUM_NULL, is_package_dir +from ._binary import BinaryItem, TLSH_CHECKSUM_NULL from ._jar_analysis import analyze_jar_file, merge_binary_list from ._simple_mode import print_simple_mode, filter_binary, init_simple from fosslight_util.correct import correct_with_yaml @@ -131,19 +131,16 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]): return _result_log, combined_paths_and_files, output_extensions, formats -def get_file_list(path_to_find, abs_path_to_exclude): +def get_file_list(path_to_find, excluded_files): bin_list = [] file_cnt = 0 found_jar = False for root, dirs, files in os.walk(path_to_find): - if os.path.abspath(root) in abs_path_to_exclude: - continue for file in files: - file_path = os.path.join(root, file) - file_abs_path = os.path.abspath(file_path) - if any(os.path.commonpath([file_abs_path, exclude_path]) == exclude_path - for exclude_path in abs_path_to_exclude): + bin_with_path = os.path.join(root, file) + rel_path_file = os.path.relpath(bin_with_path, path_to_find).replace('\\', '/') + if rel_path_file in excluded_files: continue file_lower_case = file.lower() extension = os.path.splitext(file_lower_case)[1][1:].strip() @@ -155,26 +152,18 @@ def get_file_list(path_to_find, abs_path_to_exclude): dir_path = directory.replace(_root_path, '', 1).lower() dir_path = os.path.sep + dir_path + os.path.sep - bin_with_path = os.path.join(root, file) bin_item = BinaryItem(bin_with_path) bin_item.binary_name_without_path = file bin_item.source_name_or_path = bin_with_path.replace(_root_path, '', 1) - is_pkg, pkg_path = is_package_dir(bin_with_path, _root_path) - if is_pkg: - bin_item.source_name_or_path = pkg_path - if not any(x.source_name_or_path == bin_item.source_name_or_path for x in bin_list): - bin_item.exclude = True - bin_list.append(bin_item) - continue - bin_list.append(bin_item) file_cnt += 1 return file_cnt, bin_list, found_jar def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=False, - correct_mode=True, correct_filepath="", path_to_exclude=[]): + correct_mode=True, correct_filepath="", path_to_exclude=[], + all_exclude_mode=()): global start_time, _root_path, _result_log mode = "Normal Mode" @@ -200,10 +189,12 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F bin_list = [] scan_item = ScannerItem(PKG_NAME, "") - excluded_path_with_default_exclusion, excluded_path_without_dot, excluded_files, cnt_file_except_skipped \ - = get_excluded_paths(path_to_find_bin, path_to_exclude) - - abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_find_bin, path)) for path in excluded_files] + if all_exclude_mode and len(all_exclude_mode) == 4: + excluded_path_with_default_exclusion, excluded_path_without_dot, excluded_files, cnt_file_except_skipped = all_exclude_mode + else: + excluded_path_with_default_exclusion, excluded_path_without_dot, excluded_files, cnt_file_except_skipped \ + = get_excluded_paths(path_to_find_bin, path_to_exclude) + logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}") if not os.path.isdir(path_to_find_bin): error_occured(error_msg=f"(-p option) Can't find the directory: {path_to_find_bin}", @@ -213,7 +204,7 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F if not correct_filepath: correct_filepath = path_to_find_bin try: - total_file_cnt, file_list, found_jar = get_file_list(path_to_find_bin, abs_path_to_exclude) + total_file_cnt, file_list, found_jar = get_file_list(path_to_find_bin, excluded_files) return_list = list(return_bin_only(file_list)) except Exception as ex: error_occured(error_msg=f"Failed to check whether it is binary or not : {ex}", @@ -245,7 +236,7 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F logger.warning(f"Java version {java_ver} detected (<11). FOSSLight Binary Scanner requires Java 11+ to analyze .jar files.") else: logger.info("Run OWASP Dependency-check to analyze .jar file") - owasp_items, vulnerability_items, success = analyze_jar_file(path_to_find_bin, abs_path_to_exclude) + owasp_items, vulnerability_items, success = analyze_jar_file(path_to_find_bin, excluded_files) if success: return_list = merge_binary_list(owasp_items, vulnerability_items, return_list) else: @@ -263,8 +254,6 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F logger.info("Success to correct with yaml.") scan_item.set_cover_comment(f"Detected binaries: {len(return_list)} (Scanned Files : {cnt_file_except_skipped})") - if total_bin_cnt == 0: - scan_item.set_cover_comment("(No binary detected.) ") for combined_path_and_file, output_extension, output_format in zip(result_reports, output_extensions, formats): results.append(write_output_file(combined_path_and_file, output_extension, scan_item,