diff --git a/CHANGELOG.md b/CHANGELOG.md index 4eb7e83f..738ec669 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ ### Breaking Changes +- Remove redundant `os_` parameter from `capa.loader.collect_metadata()`. External scripts calling this function must be updated to remove this argument. OS metadata now relies solely on the extractor yielding it via `extract_global_features()`. @mike-hunhoff #3076 + ### New Rules (7) - nursery/decrypt-payload-stored-as-ipv6-strings corkami@google.com @@ -111,6 +113,7 @@ - fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin - fix: assign ConfigDict to model_config in ConciseModel so extra="ignore" is actually applied @williballenthin (SURF-42) - fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin (SURF-41) +- fix: remove redundant code related to cli loading @mike-hunhoff #3076 ### capa Explorer Web @@ -120,7 +123,6 @@ - tests: update binja version to 5.3 @mr-tz #3011 - ci: use explicit and per job permissions @mike-hunhoff #3002 - replace black/isort/flake8 with ruff @mike-hunhoff #2992 - - ci: update GitHub Actions to support Node.js 24 (deprecate Node.js 20) @mr-tz #2984 ### Raw diffs diff --git a/capa/loader.py b/capa/loader.py index f12ba45d..b0895b25 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -623,7 +623,6 @@ def collect_metadata( argv: list[str], input_path: Path, input_format: str, - os_: str, rules_path: list[Path], extractor: FeatureExtractor, capabilities: Capabilities, @@ -642,7 +641,7 @@ def collect_metadata( str(extractor_format[0]) if extractor_format else "unknown" if input_format == FORMAT_AUTO else input_format ) arch = str(extractor_arch[0]) if extractor_arch else "unknown" - os_ = str(extractor_os[0]) if extractor_os else "unknown" if os_ == OS_AUTO else os_ + os_ = str(extractor_os[0]) if extractor_os else "unknown" if isinstance(extractor, StaticFeatureExtractor): meta_class: type = rdoc.StaticMetadata diff --git a/capa/main.py b/capa/main.py index 8869195a..837974f5 100644 --- a/capa/main.py +++ b/capa/main.py @@ -756,7 +756,7 @@ def get_file_extractors_from_cli(args, input_format: str) -> list[FeatureExtract raise ShouldExitError(E_INVALID_FILE_TYPE) from e -def find_static_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]) -> bool: +def find_static_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]): """ args: args: The parsed command line arguments from `install_common_args`. @@ -787,10 +787,9 @@ def find_static_limitations_from_cli(args, rules: RuleSet, file_extractors: list if not (args.verbose or args.vverbose or args.json): logger.debug("file limitation short circuit, won't analyze fully.") raise ShouldExitError(E_FILE_LIMITATION) - return found_file_limitation -def find_dynamic_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]) -> bool: +def find_dynamic_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]): """ Does the dynamic analysis describe some trace that we may not support well? For example, .NET samples detonated in a sandbox, which may rely on different API patterns than we currently describe in our rules. @@ -812,7 +811,6 @@ def find_dynamic_limitations_from_cli(args, rules: RuleSet, file_extractors: lis if not (args.verbose or args.vverbose or args.json): logger.debug("file limitation short circuit, won't analyze fully.") raise ShouldExitError(E_FILE_LIMITATION) - return found_dynamic_limitation def get_signatures_from_cli(args, input_format: str, backend: str) -> list[Path]: @@ -1024,20 +1022,14 @@ def main(argv: Optional[list[str]] = None): try: rules: RuleSet = get_rules_from_cli(args) - found_limitation = False file_extractors = get_file_extractors_from_cli(args, input_format) if input_format in STATIC_FORMATS: # only static extractors have file limitations - found_limitation = find_static_limitations_from_cli(args, rules, file_extractors) + find_static_limitations_from_cli(args, rules, file_extractors) if input_format in DYNAMIC_FORMATS: - found_limitation = find_dynamic_limitations_from_cli(args, rules, file_extractors) + find_dynamic_limitations_from_cli(args, rules, file_extractors) backend = get_backend_from_cli(args, input_format) - sample_path = get_sample_path_from_cli(args, backend) - if sample_path is None: - os_ = "unknown" - else: - os_ = capa.loader.get_os(sample_path) extractor: FeatureExtractor = get_extractor_from_cli(args, input_format, backend) except ShouldExitError as e: return e.status_code @@ -1045,7 +1037,7 @@ def main(argv: Optional[list[str]] = None): capabilities: Capabilities = find_capabilities(rules, extractor, disable_progress=args.quiet) meta: rdoc.Metadata = capa.loader.collect_metadata( - argv, args.input_file, input_format, os_, args.rules, extractor, capabilities + argv, args.input_file, input_format, args.rules, extractor, capabilities ) layout = capa.loader.compute_layout(rules, extractor, capabilities.matches) if isinstance(meta, rdoc.StaticMetadata): @@ -1055,13 +1047,6 @@ def main(argv: Optional[list[str]] = None): assert isinstance(layout, rdoc.DynamicLayout) meta.analysis.layout = layout - if found_limitation: - # bail if capa's static feature extractor encountered file limitation e.g. a packed binary - # or capa's dynamic feature extractor encountered some limitation e.g. a dotnet sample - # do show the output in verbose mode, though. - if not (args.verbose or args.vverbose or args.json): - return E_FILE_LIMITATION - if args.json: print(capa.render.json.render(meta, rules, capabilities.matches)) elif args.vverbose: diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index 7c84d779..76c63f95 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -124,11 +124,6 @@ def get_capa_results(args): input_format = capa.main.get_input_format_from_cli(args) rules = capa.main.get_rules_from_cli(args) backend = capa.main.get_backend_from_cli(args, input_format) - sample_path = capa.main.get_sample_path_from_cli(args, backend) - if sample_path is None: - os_ = "unknown" - else: - os_ = capa.loader.get_os(sample_path) extractor = capa.main.get_extractor_from_cli(args, input_format, backend) except capa.main.ShouldExitError as e: # i'm not 100% sure if multiprocessing will reliably raise exceptions across process boundaries. @@ -146,7 +141,7 @@ def get_capa_results(args): capabilities = capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True) - meta = capa.loader.collect_metadata(argv, args.input_file, format_, os_, [], extractor, capabilities) + meta = capa.loader.collect_metadata(argv, args.input_file, format_, [], extractor, capabilities) meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches) doc = rd.ResultDocument.from_capa(meta, rules, capabilities.matches) diff --git a/scripts/capa-as-library.py b/scripts/capa-as-library.py index cbf2fb50..c6a78ec3 100644 --- a/scripts/capa-as-library.py +++ b/scripts/capa-as-library.py @@ -185,7 +185,7 @@ def capa_details(rules_path: Path, input_file: Path, output_format="dictionary") capabilities = capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True) # collect metadata (used only to make rendering more complete) - meta = capa.loader.collect_metadata([], input_file, FORMAT_AUTO, OS_AUTO, [rules_path], extractor, capabilities) + meta = capa.loader.collect_metadata([], input_file, FORMAT_AUTO, [rules_path], extractor, capabilities) meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches) capa_output: Any = False diff --git a/scripts/detect-binexport2-capabilities.py b/scripts/detect-binexport2-capabilities.py index 8ad29148..fe530989 100644 --- a/scripts/detect-binexport2-capabilities.py +++ b/scripts/detect-binexport2-capabilities.py @@ -68,10 +68,6 @@ def main(argv=None): backend = capa.main.get_backend_from_cli(args, input_format) assert backend == BACKEND_BINEXPORT2 - sample_path = capa.main.get_sample_path_from_cli(args, backend) - assert sample_path is not None - os_ = capa.loader.get_os(sample_path) - rules = capa.main.get_rules_from_cli(args) extractor = capa.main.get_extractor_from_cli(args, input_format, backend) @@ -96,7 +92,7 @@ def main(argv=None): capabilities = capa.capabilities.common.find_capabilities(rules, extractor) - meta = capa.loader.collect_metadata(argv, args.input_file, input_format, os_, args.rules, extractor, capabilities) + meta = capa.loader.collect_metadata(argv, args.input_file, input_format, args.rules, extractor, capabilities) meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches) doc = rd.ResultDocument.from_capa(meta, rules, capabilities.matches) diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index 556a578c..c677c467 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -143,18 +143,13 @@ def main(argv=None): input_format = capa.main.get_input_format_from_cli(args) rules = capa.main.get_rules_from_cli(args) backend = capa.main.get_backend_from_cli(args, input_format) - sample_path = capa.main.get_sample_path_from_cli(args, backend) - if sample_path is None: - os_ = "unknown" - else: - os_ = capa.loader.get_os(sample_path) extractor = capa.main.get_extractor_from_cli(args, input_format, backend) except capa.main.ShouldExitError as e: return e.status_code capabilities = capa.capabilities.common.find_capabilities(rules, extractor) - meta = capa.loader.collect_metadata(argv, args.input_file, input_format, os_, args.rules, extractor, capabilities) + meta = capa.loader.collect_metadata(argv, args.input_file, input_format, args.rules, extractor, capabilities) meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches) if capa.capabilities.common.has_static_limitation(rules, capabilities):