remove redundant code related to cli loading (#3076)

* remove unnecessary os_ argument from capa.loader.collect_metadata

* remove redundant limitation check in main.py

* update CHANGELOG

* Update CHANGELOG.md
This commit is contained in:
Mike Hunhoff
2026-05-15 15:20:46 -06:00
committed by GitHub
parent 7e06ba0ffe
commit bbe050437b
7 changed files with 13 additions and 41 deletions
+3 -1
View File
@@ -6,6 +6,8 @@
### Breaking Changes
- Remove redundant `os_` parameter from `capa.loader.collect_metadata()`. External scripts calling this function must be updated to remove this argument. OS metadata now relies solely on the extractor yielding it via `extract_global_features()`. @mike-hunhoff #3076
### New Rules (7)
- nursery/decrypt-payload-stored-as-ipv6-strings corkami@google.com
@@ -111,6 +113,7 @@
- fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin
- fix: assign ConfigDict to model_config in ConciseModel so extra="ignore" is actually applied @williballenthin (SURF-42)
- fix: replace assert with isinstance guard in get_callee for invalid MethodSpec tokens @williballenthin (SURF-41)
- fix: remove redundant code related to cli loading @mike-hunhoff #3076
### capa Explorer Web
@@ -120,7 +123,6 @@
- tests: update binja version to 5.3 @mr-tz #3011
- ci: use explicit and per job permissions @mike-hunhoff #3002
- replace black/isort/flake8 with ruff @mike-hunhoff #2992
- ci: update GitHub Actions to support Node.js 24 (deprecate Node.js 20) @mr-tz #2984
### Raw diffs
+1 -2
View File
@@ -623,7 +623,6 @@ def collect_metadata(
argv: list[str],
input_path: Path,
input_format: str,
os_: str,
rules_path: list[Path],
extractor: FeatureExtractor,
capabilities: Capabilities,
@@ -642,7 +641,7 @@ def collect_metadata(
str(extractor_format[0]) if extractor_format else "unknown" if input_format == FORMAT_AUTO else input_format
)
arch = str(extractor_arch[0]) if extractor_arch else "unknown"
os_ = str(extractor_os[0]) if extractor_os else "unknown" if os_ == OS_AUTO else os_
os_ = str(extractor_os[0]) if extractor_os else "unknown"
if isinstance(extractor, StaticFeatureExtractor):
meta_class: type = rdoc.StaticMetadata
+5 -20
View File
@@ -756,7 +756,7 @@ def get_file_extractors_from_cli(args, input_format: str) -> list[FeatureExtract
raise ShouldExitError(E_INVALID_FILE_TYPE) from e
def find_static_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]) -> bool:
def find_static_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]):
"""
args:
args: The parsed command line arguments from `install_common_args`.
@@ -787,10 +787,9 @@ def find_static_limitations_from_cli(args, rules: RuleSet, file_extractors: list
if not (args.verbose or args.vverbose or args.json):
logger.debug("file limitation short circuit, won't analyze fully.")
raise ShouldExitError(E_FILE_LIMITATION)
return found_file_limitation
def find_dynamic_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]) -> bool:
def find_dynamic_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]):
"""
Does the dynamic analysis describe some trace that we may not support well?
For example, .NET samples detonated in a sandbox, which may rely on different API patterns than we currently describe in our rules.
@@ -812,7 +811,6 @@ def find_dynamic_limitations_from_cli(args, rules: RuleSet, file_extractors: lis
if not (args.verbose or args.vverbose or args.json):
logger.debug("file limitation short circuit, won't analyze fully.")
raise ShouldExitError(E_FILE_LIMITATION)
return found_dynamic_limitation
def get_signatures_from_cli(args, input_format: str, backend: str) -> list[Path]:
@@ -1024,20 +1022,14 @@ def main(argv: Optional[list[str]] = None):
try:
rules: RuleSet = get_rules_from_cli(args)
found_limitation = False
file_extractors = get_file_extractors_from_cli(args, input_format)
if input_format in STATIC_FORMATS:
# only static extractors have file limitations
found_limitation = find_static_limitations_from_cli(args, rules, file_extractors)
find_static_limitations_from_cli(args, rules, file_extractors)
if input_format in DYNAMIC_FORMATS:
found_limitation = find_dynamic_limitations_from_cli(args, rules, file_extractors)
find_dynamic_limitations_from_cli(args, rules, file_extractors)
backend = get_backend_from_cli(args, input_format)
sample_path = get_sample_path_from_cli(args, backend)
if sample_path is None:
os_ = "unknown"
else:
os_ = capa.loader.get_os(sample_path)
extractor: FeatureExtractor = get_extractor_from_cli(args, input_format, backend)
except ShouldExitError as e:
return e.status_code
@@ -1045,7 +1037,7 @@ def main(argv: Optional[list[str]] = None):
capabilities: Capabilities = find_capabilities(rules, extractor, disable_progress=args.quiet)
meta: rdoc.Metadata = capa.loader.collect_metadata(
argv, args.input_file, input_format, os_, args.rules, extractor, capabilities
argv, args.input_file, input_format, args.rules, extractor, capabilities
)
layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
if isinstance(meta, rdoc.StaticMetadata):
@@ -1055,13 +1047,6 @@ def main(argv: Optional[list[str]] = None):
assert isinstance(layout, rdoc.DynamicLayout)
meta.analysis.layout = layout
if found_limitation:
# bail if capa's static feature extractor encountered file limitation e.g. a packed binary
# or capa's dynamic feature extractor encountered some limitation e.g. a dotnet sample
# do show the output in verbose mode, though.
if not (args.verbose or args.vverbose or args.json):
return E_FILE_LIMITATION
if args.json:
print(capa.render.json.render(meta, rules, capabilities.matches))
elif args.vverbose:
+1 -6
View File
@@ -124,11 +124,6 @@ def get_capa_results(args):
input_format = capa.main.get_input_format_from_cli(args)
rules = capa.main.get_rules_from_cli(args)
backend = capa.main.get_backend_from_cli(args, input_format)
sample_path = capa.main.get_sample_path_from_cli(args, backend)
if sample_path is None:
os_ = "unknown"
else:
os_ = capa.loader.get_os(sample_path)
extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
except capa.main.ShouldExitError as e:
# i'm not 100% sure if multiprocessing will reliably raise exceptions across process boundaries.
@@ -146,7 +141,7 @@ def get_capa_results(args):
capabilities = capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
meta = capa.loader.collect_metadata(argv, args.input_file, format_, os_, [], extractor, capabilities)
meta = capa.loader.collect_metadata(argv, args.input_file, format_, [], extractor, capabilities)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
doc = rd.ResultDocument.from_capa(meta, rules, capabilities.matches)
+1 -1
View File
@@ -185,7 +185,7 @@ def capa_details(rules_path: Path, input_file: Path, output_format="dictionary")
capabilities = capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
# collect metadata (used only to make rendering more complete)
meta = capa.loader.collect_metadata([], input_file, FORMAT_AUTO, OS_AUTO, [rules_path], extractor, capabilities)
meta = capa.loader.collect_metadata([], input_file, FORMAT_AUTO, [rules_path], extractor, capabilities)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
capa_output: Any = False
+1 -5
View File
@@ -68,10 +68,6 @@ def main(argv=None):
backend = capa.main.get_backend_from_cli(args, input_format)
assert backend == BACKEND_BINEXPORT2
sample_path = capa.main.get_sample_path_from_cli(args, backend)
assert sample_path is not None
os_ = capa.loader.get_os(sample_path)
rules = capa.main.get_rules_from_cli(args)
extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
@@ -96,7 +92,7 @@ def main(argv=None):
capabilities = capa.capabilities.common.find_capabilities(rules, extractor)
meta = capa.loader.collect_metadata(argv, args.input_file, input_format, os_, args.rules, extractor, capabilities)
meta = capa.loader.collect_metadata(argv, args.input_file, input_format, args.rules, extractor, capabilities)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
doc = rd.ResultDocument.from_capa(meta, rules, capabilities.matches)
+1 -6
View File
@@ -143,18 +143,13 @@ def main(argv=None):
input_format = capa.main.get_input_format_from_cli(args)
rules = capa.main.get_rules_from_cli(args)
backend = capa.main.get_backend_from_cli(args, input_format)
sample_path = capa.main.get_sample_path_from_cli(args, backend)
if sample_path is None:
os_ = "unknown"
else:
os_ = capa.loader.get_os(sample_path)
extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
except capa.main.ShouldExitError as e:
return e.status_code
capabilities = capa.capabilities.common.find_capabilities(rules, extractor)
meta = capa.loader.collect_metadata(argv, args.input_file, input_format, os_, args.rules, extractor, capabilities)
meta = capa.loader.collect_metadata(argv, args.input_file, input_format, args.rules, extractor, capabilities)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
if capa.capabilities.common.has_static_limitation(rules, capabilities):