lint: replace black/isort/flake8 with ruff (#2992)

* lint: replace isort/flake8 with ruff

* update ruff links

* remove stale isort reference

* update CHANGELOG

* address review

* remove unused imports

* remove unnecessary list comprehension

* remove quotes from type annotation

* use dict.get instead of if-else block

* remove unnecessary utf-8 encoding declaration

* Revert "remove unused imports"

This reverts commit 18ba50a22b.

* skip check for unused imports

* fix UP036 Version block is outdated for minimum Python version

* add TODO comment for unused imports

* replace black with ruff

* address review comments
This commit is contained in:
Mike Hunhoff
2026-04-07 12:10:41 -06:00
committed by GitHub
parent ac1cba74b3
commit ed7e0cd77d
52 changed files with 896 additions and 781 deletions

View File

@@ -21,10 +21,9 @@
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.ruffPath": "/usr/local/py-utils/bin/ruff",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",

View File

@@ -194,10 +194,9 @@ sign a new one.
All Python code must adhere to the style guide used by capa:
1. [PEP8](https://www.python.org/dev/peps/pep-0008/), with clarifications from
2. [Willi's style guide](https://docs.google.com/document/d/1iRpeg-w4DtibwytUyC_dDT7IGhNGBP25-nQfuBa-Fyk/edit?usp=sharing), formatted with
3. [isort](https://pypi.org/project/isort/) (with line width 120 and ordered by line length), and formatted with
4. [black](https://github.com/psf/black) (with line width 120), and formatted with
5. [dos2unix](https://linux.die.net/man/1/dos2unix)
2. [Willi's style guide](https://docs.google.com/document/d/1iRpeg-w4DtibwytUyC_dDT7IGhNGBP25-nQfuBa-Fyk/edit?usp=sharing), and checked/formatted with
3. [ruff](https://docs.astral.sh/ruff/) (with line length 120), and
4. [dos2unix](https://linux.die.net/man/1/dos2unix)
Our CI pipeline will reformat and enforce the Python styleguide.

41
.github/flake8.ini vendored
View File

@@ -1,41 +0,0 @@
[flake8]
max-line-length = 120
extend-ignore =
# E203: whitespace before ':' (black does this)
E203,
# F401: `foo` imported but unused (prefer ruff)
F401,
# F811 Redefinition of unused `foo` (prefer ruff)
F811,
# E501 line too long (prefer black)
E501,
# E701 multiple statements on one line (colon) (prefer black, see https://github.com/psf/black/issues/4173)
E701,
# B010 Do not call setattr with a constant attribute value
B010,
# G200 Logging statement uses exception in arguments
G200,
# SIM102 Use a single if-statement instead of nested if-statements
# doesn't provide a space for commenting or logical separation of conditions
SIM102,
# SIM114 Use logical or and a single body
# makes logic trees too complex
SIM114,
# SIM117 Use 'with Foo, Bar:' instead of multiple with statements
# makes lines too long
SIM117
per-file-ignores =
# T201 print found.
#
# scripts are meant to print output
scripts/*: T201
# capa.exe is meant to print output
capa/main.py: T201
# utility used to find the Binary Ninja API via invoking python.exe
capa/features/extractors/binja/find_binja_api.py: T201
copyright-check = True
copyright-min-file-size = 1
copyright-regexp = Copyright \d{4} Google LLC

90
.github/ruff.toml vendored
View File

@@ -1,18 +1,6 @@
# Enable the pycodestyle (`E`) and Pyflakes (`F`) rules by default.
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
lint.select = ["E", "F"]
# Allow autofix for all enabled rules (when `--fix`) is provided.
lint.fixable = ["ALL"]
lint.unfixable = []
# E402 module level import not at top of file
# E722 do not use bare 'except'
# E501 line too long
lint.ignore = ["E402", "E722", "E501"]
line-length = 120
preview = true # Required to enable pre-release copyright header checks (CPY001)
explicit-preview-rules = true
exclude = [
# Exclude a variety of commonly ignored directories.
@@ -39,5 +27,77 @@ exclude = [
"venv",
# protobuf generated files
"*_pb2.py",
"*_pb2.pyi"
"*_pb2.pyi",
"rules"
]
lint.select = [
"E", # pycodestyle (base style rules)
"F", # Pyflakes (logical/syntax errors)
"I", # isort (import sorting)
"B", # flake8-bugbear (common bugs/design problems)
"C4", # flake8-comprehensions (simplify list/dict comprehensions)
"ISC", # flake8-implicit-str-concat (detect accidental multi-line string issues)
"T20", # flake8-print (prevent leftover print/pprint statements)
"SIM", # flake8-simplify (code simplification upgrades)
"CPY", # flake8-copyright (header requirement enforcement)
"G", # flake8-logging-format (logging statement validation)
"TD", # flake8-todos (TODO formatting requirements)
"PTH", # flake8-use-pathlib (migration from os.path to Pathlib)
"UP", # pyupgrade (modern Python syntax upgrades)
"CPY001", # flake8-copyright
]
# Allow autofix for all enabled rules (when `--fix`) is provided.
lint.fixable = ["ALL"]
lint.unfixable = []
# Map existing flake8 ignores to maintain strict parity
lint.ignore = [
# Legacy flake8 ignores
"E402", # Module level import not at top of file
"E722", # Do not use bare except
"E501", # Line too long
"E203", # Whitespace before ':'
"E701", # Multiple statements on one line
"B010", # Do not call setattr with a constant attribute value
"SIM102", # Use a single if statement instead of nested if statements
"SIM114", # Combine if branches using logical or operator
# Newly surfaced Ruff strictness ignores
"B905", # zip() without an explicit strict= parameter
"UP032", # Use f-string instead of format call
"UP031", # Use format specifiers instead of percent format
"SIM300", # Yoda condition detected (constant before variable)
"SIM108", # Use ternary operator instead of if-else block
"ISC003", # Explicitly concatenated string should be implicitly concatenated
"UP035", # Deprecated typing alias usage
"UP006", # Use type instead of Type for type annotation
"SIM115", # Use a context manager for opening files
"SIM118", # Use key not in dict instead of key not in dict.keys()
"UP024", # Replace aliased errors with OSError
"UP045", # Use X | None for optional type annotations
"SIM103", # Return negated condition directly
"UP007", # Use X | Y for union type annotations
"B904", # Raise exceptions within except clause using raise from
"UP028", # Replace yield over for loop with yield from
"C409", # Unnecessary list comprehension passed to tuple()
# TODO(mike-hunhoff): address circular dependencies
# https://github.com/mandiant/capa/issues/2996
"F401", # Unused imports
]
[lint.per-file-ignores]
# T201 print found schemas for scripts and entrypoints
"scripts/*" = ["T201"]
"capa/main.py" = ["T201"]
"capa/features/extractors/binja/find_binja_api.py" = ["T201"]
"tests/conftest.py" = ["I001"] # Suppress import sorting to preserve explicit legacy fixture loading order
"*_pb2.py" = ["ALL"] # Completely disable all formatting for auto-generated protocol buffer files
[lint.flake8-copyright]
notice-rgx = "Copyright \\d{4} Google LLC"
min-file-size = 1
[lint.isort]
length-sort = true

View File

@@ -1,4 +1,4 @@
name: black auto-format
name: ruff auto-format
on:
pull_request:
@@ -13,7 +13,7 @@ permissions:
contents: write
jobs:
black-format:
ruff-format:
# only run on dependabot PRs or manual trigger
if: github.actor == 'dependabot[bot]' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-22.04
@@ -35,13 +35,15 @@ jobs:
pip install -r requirements.txt
pip install -e .[dev,scripts]
- name: Run isort
run: pre-commit run isort --all-files
- name: Run black/continue
# black returns non-zero error code after formatting, which is what we expect
- name: Run ruff check --fix/continue
# ruff returns non-zero error code after formatting, which is what we expect
continue-on-error: true
run: pre-commit run black --all-files
run: pre-commit run ruff --all-files
- name: Run ruff format/continue
# ruff format returns non-zero error code after formatting, which is what we expect
continue-on-error: true
run: pre-commit run ruff-format --all-files
- name: Check for changes
id: changes
@@ -58,5 +60,5 @@ jobs:
git config user.name "${GITHUB_ACTOR}"
git config user.email "${GITHUB_ACTOR_ID}+${GITHUB_ACTOR}@users.noreply.github.com"
git add -A
git commit -m "style: auto-format with black and isort"
git commit -m "style: auto-format with ruff"
git push

View File

@@ -52,12 +52,8 @@ jobs:
pip install -e .[dev,scripts]
- name: Lint with ruff
run: pre-commit run ruff
- name: Lint with isort
run: pre-commit run isort --show-diff-on-failure
- name: Lint with black
run: pre-commit run black --show-diff-on-failure
- name: Lint with flake8
run: pre-commit run flake8 --hook-stage manual
- name: Check formatting with ruff
run: pre-commit run ruff-format --show-diff-on-failure
- name: Check types with mypy
run: pre-commit run mypy --hook-stage manual
- name: Check imports against dependencies

View File

@@ -1,15 +1,9 @@
@isort:
pre-commit run isort --show-diff-on-failure --all-files
@black:
pre-commit run black --show-diff-on-failure --all-files
@ruff-format:
pre-commit run ruff-format --show-diff-on-failure --all-files
@ruff:
pre-commit run ruff --all-files
@flake8:
pre-commit run flake8 --hook-stage manual --all-files
@mypy:
pre-commit run mypy --hook-stage manual --all-files
@@ -17,9 +11,7 @@
pre-commit run deptry --hook-stage manual --all-files
@lint:
-just isort
-just black
-just ruff
-just flake8
-just ruff-format
-just mypy
-just deptry

View File

@@ -6,53 +6,31 @@
# pre-commit install --hook-type pre-push
# pre-commit installed at .git/hooks/pre-push
#
# run all linters liks:
# run all linters like:
#
# pre-commit run --all-files
# isort....................................................................Passed
# black....................................................................Passed
# ruff-format..............................................................Passed
# ruff.....................................................................Passed
# flake8...................................................................Passed
# mypy.....................................................................Passed
#
# run a single linter like:
#
# pre-commit run --all-files isort
# isort....................................................................Passed
# pre-commit run --all-files ruff
# ruff.....................................................................Passed
repos:
- repo: local
hooks:
- id: isort
name: isort
stages: [pre-commit, pre-push, manual]
language: system
entry: isort
args:
- "--length-sort"
- "--profile"
- "black"
- "--line-length=120"
- "--skip-glob"
- "*_pb2.py"
- "capa/"
- "scripts/"
- "tests/"
- "web/rules/scripts/"
always_run: true
pass_filenames: false
- repo: local
hooks:
- id: black
name: black
- id: ruff-format
name: ruff format
stages: [pre-commit, pre-push, manual]
language: system
entry: black
entry: ruff
args:
- "--line-length=120"
- "--extend-exclude"
- ".*_pb2.py"
- "format"
- "--config"
- ".github/ruff.toml"
- "capa/"
- "scripts/"
- "tests/"
@@ -69,6 +47,7 @@ repos:
entry: ruff
args:
- "check"
- "--fix"
- "--config"
- ".github/ruff.toml"
- "capa/"
@@ -78,24 +57,6 @@ repos:
always_run: true
pass_filenames: false
- repo: local
hooks:
- id: flake8
name: flake8
stages: [pre-push, manual]
language: system
entry: flake8
args:
- "--config"
- ".github/flake8.ini"
- "--extend-exclude"
- "capa/render/proto/capa_pb2.py,capa/features/extractors/binexport2/binexport2_pb2.py"
- "capa/"
- "scripts/"
- "tests/"
- "web/rules/scripts/"
always_run: true
pass_filenames: false
- repo: local
hooks:

View File

@@ -17,6 +17,7 @@
### capa Explorer IDA Pro plugin
### Development
- replace black/isort/flake8 with ruff @mike-hunhoff #2992
### Raw diffs
- [capa v9.4.0...master](https://github.com/mandiant/capa/compare/v9.4.0...master)

View File

@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");

View File

@@ -216,7 +216,7 @@ class Some(Statement):
# because we've overridden `__bool__` above.
#
# we can't use `if child is True` because the instance is not True.
success = sum([1 for child in results if bool(child) is True]) >= self.count
success = sum(1 for child in results if bool(child) is True) >= self.count
return Result(success, self, results)

View File

@@ -170,12 +170,12 @@ def is_security_cookie(
basic_block_index: int = bbi.basic_block_index
bb: BinExport2.BasicBlock = be2.basic_block[basic_block_index]
if flow_graph.entry_basic_block_index == basic_block_index:
first_addr: int = min((idx.insn_address_by_index[ir.begin_index] for ir in bb.instruction_index))
first_addr: int = min(idx.insn_address_by_index[ir.begin_index] for ir in bb.instruction_index)
if instruction_address < first_addr + SECURITY_COOKIE_BYTES_DELTA:
return True
# or insn falls at the end before return in a terminal basic block.
if basic_block_index not in (e.source_basic_block_index for e in flow_graph.edge):
last_addr: int = max((idx.insn_address_by_index[ir.end_index - 1] for ir in bb.instruction_index))
last_addr: int = max(idx.insn_address_by_index[ir.end_index - 1] for ir in bb.instruction_index)
if instruction_address > last_addr - SECURITY_COOKIE_BYTES_DELTA:
return True
return False

View File

@@ -662,14 +662,10 @@ class BinExport2InstructionPatternMatcher:
@classmethod
def from_str(cls, patterns: str):
return cls(
[
return cls([
BinExport2InstructionPattern.from_str(line)
for line in filter(
lambda line: not line.startswith("#"), (line.strip() for line in patterns.split("\n"))
)
]
)
for line in filter(lambda line: not line.startswith("#"), (line.strip() for line in patterns.split("\n")))
])
def match(
self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]

View File

@@ -186,7 +186,6 @@ def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
"""
for sym in capa.features.extractors.ghidra.helpers.get_current_program().getSymbolTable().getAllSymbols(True):
# .isExternal() misses more than this config for the function symbols
if sym.getSymbolType() == SymbolType.FUNCTION and sym.getSource() == SourceType.ANALYSIS and sym.isGlobal():
name = sym.getName() # starts to resolve names based on Ghidra's FidDB

View File

@@ -26,14 +26,14 @@ from capa.features.extractors.base_extractor import FunctionHandle
def extract_function_calls_to(fh: FunctionHandle):
"""extract callers to a function"""
f: "ghidra.program.database.function.FunctionDB" = fh.inner
f: ghidra.program.database.function.FunctionDB = fh.inner
for ref in f.getSymbol().getReferences():
if ref.getReferenceType().isCall():
yield Characteristic("calls to"), AbsoluteVirtualAddress(ref.getFromAddress().getOffset())
def extract_function_loop(fh: FunctionHandle):
f: "ghidra.program.database.function.FunctionDB" = fh.inner
f: ghidra.program.database.function.FunctionDB = fh.inner
edges = []
for block in SimpleBlockIterator(
@@ -53,7 +53,7 @@ def extract_function_loop(fh: FunctionHandle):
def extract_recursive_call(fh: FunctionHandle):
f: "ghidra.program.database.function.FunctionDB" = fh.inner
f: ghidra.program.database.function.FunctionDB = fh.inner
for func in f.getCalledFunctions(capa.features.extractors.ghidra.helpers.get_monitor()):
if func.getEntryPoint().getOffset() == f.getEntryPoint().getOffset():

View File

@@ -26,14 +26,12 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
logger = logging.getLogger(__name__)
VOID_PTR_NUMBER_PARAMS = frozenset(
{
VOID_PTR_NUMBER_PARAMS = frozenset({
"hKey",
"hKeyRoot",
"hkResult",
"samDesired",
}
)
})
def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:

View File

@@ -390,7 +390,7 @@ def is_cache_newer_than_rule_code(cache_dir: Path) -> bool:
return False
latest_cache_file = max(cache_files, key=os.path.getmtime)
cache_timestamp = os.path.getmtime(latest_cache_file)
cache_timestamp = Path(latest_cache_file).stat().st_mtime
# these are the relevant rules code files that could conflict with using an outdated cache
# delayed import due to circular dependencies
@@ -398,7 +398,7 @@ def is_cache_newer_than_rule_code(cache_dir: Path) -> bool:
import capa.rules.cache
latest_rule_code_file = max([Path(capa.rules.__file__), Path(capa.rules.cache.__file__)], key=os.path.getmtime)
rule_code_timestamp = os.path.getmtime(latest_rule_code_file)
rule_code_timestamp = Path(latest_rule_code_file).stat().st_mtime
if rule_code_timestamp > cache_timestamp:

View File

@@ -47,7 +47,7 @@ class CapaExplorerDataItem:
"""initialize item"""
self.pred = parent
self._data = data
self._children: list["CapaExplorerDataItem"] = []
self._children: list[CapaExplorerDataItem] = []
self._checked = False
self._can_check = can_check

View File

@@ -785,8 +785,13 @@ class CapaExplorerRulegenEditor(QtWidgets.QTreeWidget):
def get_features(self, selected=False, ignore=()):
""" """
for feature in filter(
lambda o: o.capa_type
in (CapaExplorerRulegenEditor.get_node_type_feature(), CapaExplorerRulegenEditor.get_node_type_comment()),
lambda o: (
o.capa_type
in (
CapaExplorerRulegenEditor.get_node_type_feature(),
CapaExplorerRulegenEditor.get_node_type_comment(),
)
),
tuple(iterate_tree(self)),
):
if feature in ignore:

View File

@@ -77,7 +77,6 @@ from capa.exceptions import (
UnsupportedOSError,
UnsupportedArchError,
UnsupportedFormatError,
UnsupportedRuntimeError,
)
from capa.features.common import (
OS_AUTO,
@@ -938,9 +937,6 @@ def apply_extractor_filters(extractor: FeatureExtractor, extractor_filters: Filt
def main(argv: Optional[list[str]] = None):
if sys.version_info < (3, 10):
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+")
if argv is None:
argv = sys.argv[1:]

View File

@@ -691,30 +691,26 @@ def static_analysis_from_pb2(analysis: capa_pb2.StaticAnalysis) -> rd.StaticAnal
rules=tuple(analysis.rules),
base_address=addr_from_pb2(analysis.base_address),
layout=rd.StaticLayout(
functions=tuple(
[
functions=tuple([
rd.FunctionLayout(
address=addr_from_pb2(f.address),
matched_basic_blocks=tuple(
[rd.BasicBlockLayout(address=addr_from_pb2(bb.address)) for bb in f.matched_basic_blocks]
),
matched_basic_blocks=tuple([
rd.BasicBlockLayout(address=addr_from_pb2(bb.address)) for bb in f.matched_basic_blocks
]),
)
for f in analysis.layout.functions
]
)
])
),
feature_counts=rd.StaticFeatureCounts(
file=analysis.feature_counts.file,
functions=tuple(
[
functions=tuple([
rd.FunctionFeatureCount(address=addr_from_pb2(f.address), count=f.count)
for f in analysis.feature_counts.functions
]
),
),
library_functions=tuple(
[rd.LibraryFunction(address=addr_from_pb2(lf.address), name=lf.name) for lf in analysis.library_functions]
]),
),
library_functions=tuple([
rd.LibraryFunction(address=addr_from_pb2(lf.address), name=lf.name) for lf in analysis.library_functions
]),
)
@@ -726,38 +722,29 @@ def dynamic_analysis_from_pb2(analysis: capa_pb2.DynamicAnalysis) -> rd.DynamicA
extractor=analysis.extractor,
rules=tuple(analysis.rules),
layout=rd.DynamicLayout(
processes=tuple(
[
processes=tuple([
rd.ProcessLayout(
address=addr_from_pb2(p.address),
name=p.name,
matched_threads=tuple(
[
matched_threads=tuple([
rd.ThreadLayout(
address=addr_from_pb2(t.address),
matched_calls=tuple(
[
rd.CallLayout(address=addr_from_pb2(c.address), name=c.name)
for c in t.matched_calls
]
),
matched_calls=tuple([
rd.CallLayout(address=addr_from_pb2(c.address), name=c.name) for c in t.matched_calls
]),
)
for t in p.matched_threads
]
),
]),
)
for p in analysis.layout.processes
]
)
])
),
feature_counts=rd.DynamicFeatureCounts(
file=analysis.feature_counts.file,
processes=tuple(
[
processes=tuple([
rd.ProcessFeatureCount(address=addr_from_pb2(p.address), count=p.count)
for p in analysis.feature_counts.processes
]
),
]),
),
)

View File

@@ -393,7 +393,6 @@ class Match(FrozenModel):
)
for location in result.locations:
# keep this in sync with the copy below
if isinstance(location, DynamicCallAddress):
if location in rule_matches:
@@ -409,15 +408,13 @@ class Match(FrozenModel):
#
# Despite the edge cases (like API hammering), this turns out to be pretty easy:
# collect the most recent match (with the given name) prior to the wanted location.
matches_in_thread = sorted(
[
matches_in_thread = sorted([
(a.id, m)
for a, m in rule_matches.items()
if isinstance(a, DynamicCallAddress)
and a.thread == location.thread
and a.id <= location.id
]
)
])
if matches_in_thread:
_, most_recent_match = matches_in_thread[-1]
children.append(Match.from_capa(rules, capabilities, most_recent_match))
@@ -470,15 +467,13 @@ class Match(FrozenModel):
if location in rule_matches:
children.append(Match.from_capa(rules, capabilities, rule_matches[location]))
else:
matches_in_thread = sorted(
[
matches_in_thread = sorted([
(a.id, m)
for a, m in rule_matches.items()
if isinstance(a, DynamicCallAddress)
and a.thread == location.thread
and a.id <= location.id
]
)
])
# namespace matches may not occur within the same thread as the result, so only
# proceed if a match within the same thread is found
if matches_in_thread:

View File

@@ -80,15 +80,13 @@ def capability_rules(doc: rd.ResultDocument) -> Iterator[rd.RuleMatches]:
def maec_rules(doc: rd.ResultDocument) -> Iterator[rd.RuleMatches]:
"""enumerate 'maec' rules."""
for rule in doc.rules.values():
if any(
[
if any([
rule.meta.maec.analysis_conclusion,
rule.meta.maec.analysis_conclusion_ov,
rule.meta.maec.malware_family,
rule.meta.maec.malware_category,
rule.meta.maec.malware_category_ov,
]
):
]):
yield rule

View File

@@ -424,20 +424,19 @@ def render_rules(console: Console, doc: rd.ResultDocument):
rows.append(("namespace", rule.meta.namespace))
if rule.meta.maec.analysis_conclusion or rule.meta.maec.analysis_conclusion_ov:
rows.append(
(
rows.append((
"maec/analysis-conclusion",
rule.meta.maec.analysis_conclusion or rule.meta.maec.analysis_conclusion_ov,
)
)
))
if rule.meta.maec.malware_family:
rows.append(("maec/malware-family", rule.meta.maec.malware_family))
if rule.meta.maec.malware_category or rule.meta.maec.malware_category_ov:
rows.append(
("maec/malware-category", rule.meta.maec.malware_category or rule.meta.maec.malware_category_ov)
)
rows.append((
"maec/malware-category",
rule.meta.maec.malware_category or rule.meta.maec.malware_category_ov,
))
rows.append(("author", ", ".join(rule.meta.authors)))

View File

@@ -98,10 +98,7 @@ Please install these dependencies before install capa (from source or from PyPI)
`$ pip install -r requirements.txt`
We use the following tools to ensure consistent code style and formatting:
- [black](https://github.com/psf/black) code formatter
- [isort](https://pypi.org/project/isort/) code formatter
- [ruff](https://beta.ruff.rs/docs/) code linter
- [flake8](https://flake8.pycqa.org/en/latest/) code linter
- [ruff](https://docs.astral.sh/ruff/) code linter and formatter
- [mypy](https://mypy-lang.org/) type checking
- [capafmt](https://github.com/mandiant/capa/blob/master/scripts/capafmt.py) rule formatter
@@ -115,17 +112,15 @@ We use [pre-commit](https://pre-commit.com/) so that its trivial to run the same
Run all linters like:
pre-commit run --hook-stage=manual --all-files
isort....................................................................Passed
black....................................................................Passed
ruff-format..............................................................Passed
ruff.....................................................................Passed
flake8...................................................................Passed
mypy.....................................................................Passed
pytest (fast)............................................................Passed
Or run a single linter like:
pre-commit run --all-files --hook-stage=manual isort
isort....................................................................Passed
pre-commit run --all-files --hook-stage=manual ruff
ruff.....................................................................Passed
Importantly, you can configure pre-commit to run automatically before every commit by running:

View File

@@ -133,20 +133,7 @@ dev = [
"pytest==9.0.2",
"pytest-sugar==1.1.1",
"pytest-instafail==0.5.0",
"flake8==7.3.0",
"flake8-bugbear==25.11.29",
"flake8-encodings==0.5.1",
"flake8-comprehensions==3.17.0",
"flake8-logging-format==0.9.0",
"flake8-no-implicit-concat==0.3.5",
"flake8-print==5.0.0",
"flake8-todos==0.3.1",
"flake8-simplify==0.30.0",
"flake8-use-pathlib==0.3.0",
"flake8-copyright==0.2.4",
"ruff==0.15.0",
"black==26.3.0",
"isort==8.0.0",
"mypy==1.20.0",
"mypy-protobuf==5.0.0",
"PyGithub==2.9.0",
@@ -166,7 +153,7 @@ build = [
# These dependencies are not used in production environments
# and should not conflict with other libraries/tooling.
"pyinstaller==6.19.0",
"setuptools==80.10.1",
"setuptools==82.0.1",
"build==1.4.0"
]
scripts = [
@@ -218,22 +205,10 @@ known_first_party = [
[tool.deptry.per_rule_ignores]
# dependencies defined but not used in the codebase
DEP002 = [
"black",
"build",
"bump-my-version",
"deptry",
"flake8",
"flake8-bugbear",
"flake8-comprehensions",
"flake8-copyright",
"flake8-encodings",
"flake8-logging-format",
"flake8-no-implicit-concat",
"flake8-print",
"flake8-simplify",
"flake8-todos",
"flake8-use-pathlib",
"isort",
"mypy",
"mypy-protobuf",
"pre-commit",

View File

@@ -40,7 +40,7 @@ pyyaml==6.0.2
rich==14.3.2
ruamel-yaml==0.19.1
ruamel-yaml-clib==0.2.14
setuptools==80.10.1
setuptools==82.0.1
six==1.17.0
sortedcontainers==2.4.0
viv-utils==0.8.0

View File

@@ -161,21 +161,19 @@ def _sarif_boilerplate(data_meta: dict, data_rules: dict) -> Optional[dict]:
id = data_rules[key]["meta"]["name"]
# Append current rule
rules.append(
{
rules.append({
# Default to attack identifier, fall back to MBC, mainly relevant if both are present
"id": id,
"name": data_rules[key]["meta"]["name"],
"shortDescription": {"text": data_rules[key]["meta"]["name"]},
"messageStrings": {"default": {"text": data_rules[key]["meta"]["name"]}},
"properties": {
"namespace": data_rules[key]["meta"]["namespace"] if "namespace" in data_rules[key]["meta"] else [],
"namespace": data_rules[key]["meta"].get("namespace", []),
"scopes": data_rules[key]["meta"]["scopes"],
"references": data_rules[key]["meta"]["references"],
"lib": data_rules[key]["meta"]["lib"],
},
}
)
})
tool = Tool(
driver=ToolComponent(
@@ -284,13 +282,11 @@ def _enumerate_evidence(node: dict, related_count: int) -> list[dict]:
if loc["type"] != "absolute":
continue
related_locations.append(
{
related_locations.append({
"id": related_count,
"message": {"text": label},
"physicalLocation": {"address": {"absoluteAddress": loc["value"]}},
}
)
})
related_count += 1
if node.get("success") and node.get("node", {}).get("type") == "statement":

View File

@@ -593,8 +593,7 @@ class DuplicateFeatureUnderStatement(Lint):
def check_rule(self, ctx: Context, rule: Rule) -> bool:
self.violation = False
self.recommendation = ""
STATEMENTS = frozenset(
{
STATEMENTS = frozenset({
"or",
"and",
"not",
@@ -605,8 +604,7 @@ class DuplicateFeatureUnderStatement(Lint):
"instruction",
"call",
" or more",
}
)
})
# rule.statement discards the duplicate features by default so
# need to use the rule definition to check for duplicates
data = rule._get_ruamel_yaml_parser().load(rule.definition)
@@ -1099,7 +1097,7 @@ def lint_rule(ctx: Context, rule: Rule):
# and ends up just producing a lot of noise.
if not (is_nursery_rule(rule) and len(violations) == 1 and violations[0].name == "missing examples"):
print("")
print(f'{" (nursery) " if is_nursery_rule(rule) else ""} {rule.name}')
print(f"{' (nursery) ' if is_nursery_rule(rule) else ''} {rule.name}")
for violation in violations:
print(
@@ -1112,8 +1110,10 @@ def lint_rule(ctx: Context, rule: Rule):
lints_failed = len(
tuple(
filter(
lambda v: v.level == Lint.FAIL
and not (v.name == "missing examples" or v.name == "referenced example doesn't exist"),
lambda v: (
v.level == Lint.FAIL
and not (v.name == "missing examples" or v.name == "referenced example doesn't exist")
),
violations,
)
)
@@ -1121,8 +1121,9 @@ def lint_rule(ctx: Context, rule: Rule):
lints_warned = len(
tuple(
filter(
lambda v: v.level == Lint.WARN
or (v.level == Lint.FAIL and v.name == "referenced example doesn't exist"),
lambda v: (
v.level == Lint.WARN or (v.level == Lint.FAIL and v.name == "referenced example doesn't exist")
),
violations,
)
)
@@ -1130,7 +1131,7 @@ def lint_rule(ctx: Context, rule: Rule):
if (not lints_failed) and (not lints_warned) and has_examples:
print("")
print(f'{" (nursery) " if is_nursery_rule(rule) else ""} {rule.name}')
print(f"{' (nursery) ' if is_nursery_rule(rule) else ''} {rule.name}")
print(f" {Lint.WARN}: '[green]no lint failures[/green]': Graduate the rule")
print("")
else:

View File

@@ -21,13 +21,11 @@ tracemalloc.start()
def display_top(snapshot, key_type="lineno", limit=10):
# via: https://docs.python.org/3/library/tracemalloc.html#pretty-top
snapshot = snapshot.filter_traces(
(
snapshot = snapshot.filter_traces((
tracemalloc.Filter(False, "<frozen importlib._bootstrap_external>"),
tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
tracemalloc.Filter(False, "<unknown>"),
)
)
))
top_stats = snapshot.statistics(key_type)
print(f"Top {limit} lines")
@@ -47,7 +45,7 @@ def display_top(snapshot, key_type="lineno", limit=10):
def main():
# import within main to keep isort happy
# import within main to keep ruff happy
# while also invoking tracemalloc.start() immediately upon start.
import io
import os

View File

@@ -97,24 +97,20 @@ class MitreExtractor:
"""Get tactics IDs from Mitre matrix."""
# Only one matrix for enterprise att&ck framework
matrix = self._remove_deprecated_objects(
self._memory_store.query(
[
self._memory_store.query([
Filter("type", "=", "x-mitre-matrix"),
]
)
])
)[0]
return list(map(self._memory_store.get, matrix["tactic_refs"]))
def _get_techniques_from_tactic(self, tactic: str) -> list[AttackPattern]:
"""Get techniques and sub techniques from a Mitre tactic (kill_chain_phases->phase_name)"""
techniques = self._remove_deprecated_objects(
self._memory_store.query(
[
self._memory_store.query([
Filter("type", "=", "attack-pattern"),
Filter("kill_chain_phases.phase_name", "=", tactic),
Filter("kill_chain_phases.kill_chain_name", "=", self.kill_chain_name),
]
)
])
)
return techniques
@@ -122,12 +118,10 @@ class MitreExtractor:
"""Get parent technique of a sub technique using the technique ID TXXXX.YYY"""
sub_id = technique["external_references"][0]["external_id"].split(".")[0]
parent_technique = self._remove_deprecated_objects(
self._memory_store.query(
[
self._memory_store.query([
Filter("type", "=", "attack-pattern"),
Filter("external_references.external_id", "=", sub_id),
]
)
])
)[0]
return parent_technique

View File

@@ -95,7 +95,7 @@ logger = logging.getLogger("capa.show-features")
def format_address(addr: capa.features.address.Address) -> str:
return v.format_address(capa.features.freeze.Address.from_capa((addr)))
return v.format_address(capa.features.freeze.Address.from_capa(addr))
def main(argv=None):

View File

@@ -40,7 +40,7 @@ logger = logging.getLogger("show-unused-features")
def format_address(addr: capa.features.address.Address) -> str:
return v.format_address(capa.features.freeze.Address.from_capa((addr)))
return v.format_address(capa.features.freeze.Address.from_capa(addr))
def get_rules_feature_set(rules: capa.rules.RuleSet) -> set[Feature]:

View File

@@ -345,7 +345,7 @@ def extract_global_features(extractor):
return features
@lru_cache()
@lru_cache
def extract_file_features(extractor):
features = collections.defaultdict(set)
for feature, va in extractor.extract_file_features():

View File

@@ -458,7 +458,8 @@ def test_pattern_parsing():
capture="#int",
)
assert BinExport2InstructionPatternMatcher.from_str("""
assert (
BinExport2InstructionPatternMatcher.from_str("""
# comment
br reg
br reg(not-stack)
@@ -479,7 +480,9 @@ def test_pattern_parsing():
call [reg * #int + #int]
call [reg + reg + #int]
call [reg + #int]
""").queries is not None
""").queries
is not None
)
def match_address(extractor: BinExport2FeatureExtractor, queries: BinExport2InstructionPatternMatcher, address: int):

View File

@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,10 +19,10 @@ from capa.features.extractors.base_extractor import FunctionFilter
def test_match_across_scopes_file_function(z9324d_extractor):
rules = capa.rules.RuleSet(
[
rules = capa.rules.RuleSet([
# this rule should match on a function (0x4073F0)
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: install service
@@ -37,9 +36,11 @@ def test_match_across_scopes_file_function(z9324d_extractor):
- api: advapi32.OpenSCManagerA
- api: advapi32.CreateServiceA
- api: advapi32.StartServiceA
""")),
""")
),
# this rule should match on a file feature
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: .text section
@@ -50,11 +51,13 @@ def test_match_across_scopes_file_function(z9324d_extractor):
- 9324d1a8ae37a36ae560c37448c9705a
features:
- section: .text
""")),
""")
),
# this rule should match on earlier rule matches:
# - install service, with function scope
# - .text section, with file scope
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: .text section and install service
@@ -67,9 +70,9 @@ def test_match_across_scopes_file_function(z9324d_extractor):
- and:
- match: install service
- match: .text section
""")),
]
)
""")
),
])
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "install service" in capabilities.matches
assert ".text section" in capabilities.matches
@@ -77,10 +80,10 @@ def test_match_across_scopes_file_function(z9324d_extractor):
def test_match_across_scopes(z9324d_extractor):
rules = capa.rules.RuleSet(
[
rules = capa.rules.RuleSet([
# this rule should match on a basic block (including at least 0x403685)
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: tight loop
@@ -91,10 +94,12 @@ def test_match_across_scopes(z9324d_extractor):
- 9324d1a8ae37a36ae560c37448c9705a:0x403685
features:
- characteristic: tight loop
""")),
""")
),
# this rule should match on a function (0x403660)
# based on API, as well as prior basic block rule match
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: kill thread loop
@@ -108,9 +113,11 @@ def test_match_across_scopes(z9324d_extractor):
- api: kernel32.TerminateThread
- api: kernel32.CloseHandle
- match: tight loop
""")),
""")
),
# this rule should match on a file feature and a prior function rule match
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: kill thread program
@@ -123,9 +130,9 @@ def test_match_across_scopes(z9324d_extractor):
- and:
- section: .text
- match: kill thread loop
""")),
]
)
""")
),
])
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "tight loop" in capabilities.matches
assert "kill thread loop" in capabilities.matches
@@ -133,7 +140,9 @@ def test_match_across_scopes(z9324d_extractor):
def test_subscope_bb_rules(z9324d_extractor):
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -144,14 +153,18 @@ def test_subscope_bb_rules(z9324d_extractor):
- and:
- basic block:
- characteristic: tight loop
"""))])
""")
)
])
# tight loop at 0x403685
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "test rule" in capabilities.matches
def test_match_specific_functions(z9324d_extractor):
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: receive data
@@ -163,7 +176,9 @@ def test_match_specific_functions(z9324d_extractor):
features:
- or:
- api: recv
"""))])
""")
)
])
extractor = FunctionFilter(z9324d_extractor, {0x4019C0})
capabilities = capa.capabilities.common.find_capabilities(rules, extractor)
matches = capabilities.matches["receive data"]
@@ -174,7 +189,9 @@ def test_match_specific_functions(z9324d_extractor):
def test_byte_matching(z9324d_extractor):
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: byte match test
@@ -184,13 +201,17 @@ def test_byte_matching(z9324d_extractor):
features:
- and:
- bytes: ED 24 9E F4 52 A9 07 47 55 8E E1 AB 30 8E 23 61
"""))])
""")
)
])
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "byte match test" in capabilities.matches
def test_com_feature_matching(z395eb_extractor):
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: initialize IWebBrowser2
@@ -202,13 +223,17 @@ def test_com_feature_matching(z395eb_extractor):
- api: ole32.CoCreateInstance
- com/class: InternetExplorer #bytes: 01 DF 02 00 00 00 00 00 C0 00 00 00 00 00 00 46 = CLSID_InternetExplorer
- com/interface: IWebBrowser2 #bytes: 61 16 0C D3 AF CD D0 11 8A 3E 00 C0 4F C9 E2 6E = IID_IWebBrowser2
"""))])
""")
)
])
capabilities = capa.main.find_capabilities(rules, z395eb_extractor)
assert "initialize IWebBrowser2" in capabilities.matches
def test_count_bb(z9324d_extractor):
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: count bb
@@ -219,14 +244,18 @@ def test_count_bb(z9324d_extractor):
features:
- and:
- count(basic blocks): 1 or more
"""))])
""")
)
])
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "count bb" in capabilities.matches
def test_instruction_scope(z9324d_extractor):
# .text:004071A4 68 E8 03 00 00 push 3E8h
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: push 1000
@@ -238,7 +267,9 @@ def test_instruction_scope(z9324d_extractor):
- and:
- mnemonic: push
- number: 1000
"""))])
""")
)
])
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "push 1000" in capabilities.matches
assert 0x4071A4 in {result[0] for result in capabilities.matches["push 1000"]}
@@ -248,7 +279,9 @@ def test_instruction_subscope(z9324d_extractor):
# .text:00406F60 sub_406F60 proc near
# [...]
# .text:004071A4 68 E8 03 00 00 push 3E8h
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: push 1000 on i386
@@ -262,7 +295,9 @@ def test_instruction_subscope(z9324d_extractor):
- instruction:
- mnemonic: push
- number: 1000
"""))])
""")
)
])
capabilities = capa.capabilities.common.find_capabilities(rules, z9324d_extractor)
assert "push 1000 on i386" in capabilities.matches
assert 0x406F60 in {result[0] for result in capabilities.matches["push 1000 on i386"]}

View File

@@ -368,9 +368,9 @@ def test_dynamic_span_multiple_spans_overlapping_single_event():
def test_dynamic_span_scope_match_statements():
extractor = get_0000a657_thread3064()
ruleset = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
ruleset = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: resolve add VEH
@@ -383,8 +383,10 @@ def test_dynamic_span_scope_match_statements():
- api: LdrGetDllHandle
- api: LdrGetProcedureAddress
- string: AddVectoredExceptionHandler
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: resolve remove VEH
@@ -397,8 +399,10 @@ def test_dynamic_span_scope_match_statements():
- api: LdrGetDllHandle
- api: LdrGetProcedureAddress
- string: RemoveVectoredExceptionHandler
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: resolve add and remove VEH
@@ -409,8 +413,10 @@ def test_dynamic_span_scope_match_statements():
- and:
- match: resolve add VEH
- match: resolve remove VEH
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: has VEH runtime linking
@@ -420,9 +426,9 @@ def test_dynamic_span_scope_match_statements():
features:
- and:
- match: linking/runtime-linking/veh
""")),
]
)
""")
),
])
capabilities = capa.capabilities.dynamic.find_dynamic_capabilities(ruleset, extractor, disable_progress=True)

View File

@@ -59,25 +59,34 @@ def test_some():
assert bool(Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}})) is False
assert (
bool(
Some(2, [Number(1), Number(2), Number(3)]).evaluate(
{Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}}
)
Some(2, [Number(1), Number(2), Number(3)]).evaluate({
Number(0): {ADDR1},
Number(1): {ADDR1},
Number(2): {ADDR1},
})
)
is True
)
assert (
bool(
Some(2, [Number(1), Number(2), Number(3)]).evaluate(
{Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}}
)
Some(2, [Number(1), Number(2), Number(3)]).evaluate({
Number(0): {ADDR1},
Number(1): {ADDR1},
Number(2): {ADDR1},
Number(3): {ADDR1},
})
)
is True
)
assert (
bool(
Some(2, [Number(1), Number(2), Number(3)]).evaluate(
{Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}, Number(4): {ADDR1}}
)
Some(2, [Number(1), Number(2), Number(3)]).evaluate({
Number(0): {ADDR1},
Number(1): {ADDR1},
Number(2): {ADDR1},
Number(3): {ADDR1},
Number(4): {ADDR1},
})
)
is True
)
@@ -85,15 +94,21 @@ def test_some():
def test_complex():
assert True is bool(
Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5), Number(6)])])]).evaluate(
{Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}
)
Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5), Number(6)])])]).evaluate({
Number(5): {ADDR1},
Number(6): {ADDR1},
Number(7): {ADDR1},
Number(8): {ADDR1},
})
)
assert False is bool(
Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5)])])]).evaluate(
{Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}
)
Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5)])])]).evaluate({
Number(5): {ADDR1},
Number(6): {ADDR1},
Number(7): {ADDR1},
Number(8): {ADDR1},
})
)

View File

@@ -106,9 +106,9 @@ def test_null_feature_extractor():
DynamicCallAddress(thread=ThreadAddress(ProcessAddress(pid=1), tid=1), id=2),
]
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: create file
@@ -118,9 +118,9 @@ def test_null_feature_extractor():
features:
- and:
- api: CreateFile
""")),
]
)
""")
),
])
capabilities = capa.main.find_capabilities(rules, EXTRACTOR)
assert "create file" in capabilities.matches

View File

@@ -86,9 +86,9 @@ def test_null_feature_extractor():
AbsoluteVirtualAddress(0x401002),
]
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: xor loop
@@ -100,9 +100,9 @@ def test_null_feature_extractor():
- characteristic: tight loop
- mnemonic: xor
- characteristic: nzxor
""")),
]
)
""")
),
])
capabilities = capa.main.find_capabilities(rules, EXTRACTOR)
assert "xor loop" in capabilities.matches

View File

@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -54,14 +53,12 @@ def test_main_single_rule(z9324d_extractor, tmpdir):
rule_file = tmpdir.mkdir("capa").join("rule.yml")
rule_file.write(RULE_CONTENT)
assert (
capa.main.main(
[
capa.main.main([
path,
"-v",
"-r",
rule_file.strpath,
]
)
])
== 0
)
@@ -96,9 +93,9 @@ def test_main_shellcode(z499c2_extractor):
def test_ruleset():
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: file rule
@@ -107,8 +104,10 @@ def test_ruleset():
dynamic: process
features:
- characteristic: embedded pe
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: function rule
@@ -117,8 +116,10 @@ def test_ruleset():
dynamic: process
features:
- characteristic: tight loop
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: basic block rule
@@ -127,8 +128,10 @@ def test_ruleset():
dynamic: process
features:
- characteristic: nzxor
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: process rule
@@ -137,8 +140,10 @@ def test_ruleset():
dynamic: process
features:
- string: "explorer.exe"
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: thread rule
@@ -147,8 +152,10 @@ def test_ruleset():
dynamic: thread
features:
- api: RegDeleteKey
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test call subscope
@@ -160,8 +167,10 @@ def test_ruleset():
- string: "explorer.exe"
- call:
- api: HttpOpenRequestW
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -179,9 +188,9 @@ def test_ruleset():
- number: 6 = IPPROTO_TCP
- number: 1 = SOCK_STREAM
- number: 2 = AF_INET
""")),
]
)
""")
),
])
assert len(rules.file_rules) == 2
assert len(rules.function_rules) == 2
assert len(rules.basic_block_rules) == 2
@@ -292,7 +301,8 @@ def test_main_cape1(tmp_path):
# https://github.com/mandiant/capa/pull/1696
rules = tmp_path / "rules"
rules.mkdir()
(rules / "create-or-open-registry-key.yml").write_text(textwrap.dedent("""
(rules / "create-or-open-registry-key.yml").write_text(
textwrap.dedent("""
rule:
meta:
name: create or open registry key
@@ -322,7 +332,8 @@ def test_main_cape1(tmp_path):
- api: SHRegOpenUSKey
- api: SHRegCreateUSKey
- api: RtlCreateRegistryKey
"""))
""")
)
assert capa.main.main([str(path), "-r", str(rules)]) == 0
assert capa.main.main([str(path), "-q", "-r", str(rules)]) == 0

View File

@@ -208,7 +208,8 @@ def test_match_adds_matched_rule_feature():
def test_match_matched_rules():
"""show that using `match` adds a feature for matched rules."""
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule1
@@ -217,8 +218,10 @@ def test_match_matched_rules():
dynamic: process
features:
- number: 100
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule2
@@ -227,7 +230,8 @@ def test_match_matched_rules():
dynamic: process
features:
- match: test rule1
""")),
""")
),
]
features, _ = match(
@@ -251,7 +255,8 @@ def test_match_matched_rules():
def test_match_namespace():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: CreateFile API
@@ -261,8 +266,10 @@ def test_match_namespace():
namespace: file/create/CreateFile
features:
- api: CreateFile
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: WriteFile API
@@ -272,8 +279,10 @@ def test_match_namespace():
namespace: file/write
features:
- api: WriteFile
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: file-create
@@ -282,8 +291,10 @@ def test_match_namespace():
dynamic: process
features:
- match: file/create
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: filesystem-any
@@ -292,7 +303,8 @@ def test_match_namespace():
dynamic: process
features:
- match: file
""")),
""")
),
]
features, matches = match(
@@ -319,7 +331,8 @@ def test_match_namespace():
def test_match_substring():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -329,7 +342,8 @@ def test_match_substring():
features:
- and:
- substring: abc
""")),
""")
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
@@ -369,7 +383,8 @@ def test_match_substring():
def test_match_regex():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -379,8 +394,10 @@ def test_match_regex():
features:
- and:
- string: /.*bbbb.*/
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule with implied wildcards
@@ -390,8 +407,10 @@ def test_match_regex():
features:
- and:
- string: /bbbb/
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule with anchor
@@ -401,7 +420,8 @@ def test_match_regex():
features:
- and:
- string: /^bbbb/
""")),
""")
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
@@ -436,7 +456,8 @@ def test_match_regex():
def test_match_regex_ignorecase():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -446,7 +467,8 @@ def test_match_regex_ignorecase():
features:
- and:
- string: /.*bbbb.*/i
""")),
""")
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
@@ -458,7 +480,8 @@ def test_match_regex_ignorecase():
def test_match_regex_complex():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent(r"""
capa.rules.Rule.from_yaml(
textwrap.dedent(r"""
rule:
meta:
name: test rule
@@ -468,7 +491,8 @@ def test_match_regex_complex():
features:
- or:
- string: /.*HARDWARE\\Key\\key with spaces\\.*/i
""")),
""")
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
@@ -480,7 +504,8 @@ def test_match_regex_complex():
def test_match_regex_values_always_string():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -491,7 +516,8 @@ def test_match_regex_values_always_string():
- or:
- string: /123/
- string: /0x123/
""")),
""")
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),

View File

@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -110,8 +109,7 @@ def test_elf_parse_capa_pyinstaller_header():
# compressed ELF header of capa-v5.1.0-linux
# SHA256 e16974994914466647e24cdcfb6a6f8710297a4def21525e53f73c72c4b52fcf
elf_header = zlib.decompress(
b"".join(
[
b"".join([
b"\x78\x9c\x8d\x56\x4f\x88\x1c\xd5\x13\xae\x1d\x35\x0a\x7a\x58\x65",
b"\xd1\xa0\x9b\xb0\x82\x11\x14\x67\x63\xd6\xcd\x26\xf1\xf0\x63\x49",
b"\xdc\xc4\xc8\x26\x98\x7f\x07\x89\xa4\xed\xe9\x7e\x6f\xa6\x99\xd7",
@@ -180,7 +178,6 @@ def test_elf_parse_capa_pyinstaller_header():
b"\xda\x47\xab\xd2\xf4\xc8\x27\xed\x9f\xa4\x7d\x42\xab\x05\x38\xb6",
b"\x7c\xfc\xf0\x91\x68\x6e\x76\x6e\x76\xff\x68\x7d\x60\xb4\xda\x37",
b"\x3f\x5a\x3e\x35\x5a\x35\x30\x5c\xc3\x4d\x95\x6e\xa4\x60",
]
)
])
)
assert capa.features.extractors.elf.detect_elf_os(io.BytesIO(elf_header)) == "linux"

View File

@@ -57,7 +57,8 @@ def test_render_meta_attack():
subtechnique = "Windows Service"
canonical = "{:s}::{:s}::{:s} [{:s}]".format(tactic, technique, subtechnique, id)
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -70,7 +71,8 @@ def test_render_meta_attack():
- {:s}
features:
- number: 1
""".format(canonical))
""".format(canonical)
)
r = capa.rules.Rule.from_yaml(rule)
rule_meta = capa.render.result_document.RuleMetadata.from_capa(r)
attack = rule_meta.attack[0]
@@ -91,7 +93,8 @@ def test_render_meta_mbc():
method = "Heavens Gate"
canonical = "{:s}::{:s}::{:s} [{:s}]".format(objective, behavior, method, id)
rule = textwrap.dedent("""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -104,7 +107,8 @@ def test_render_meta_mbc():
- {:s}
features:
- number: 1
""".format(canonical))
""".format(canonical)
)
r = capa.rules.Rule.from_yaml(rule)
rule_meta = capa.render.result_document.RuleMetadata.from_capa(r)
mbc = rule_meta.mbc[0]
@@ -122,7 +126,8 @@ def test_render_meta_maec():
malware_category = "downloader"
analysis_conclusion = "malicious"
rule_yaml = textwrap.dedent("""
rule_yaml = textwrap.dedent(
"""
rule:
meta:
name: test rule
@@ -136,7 +141,8 @@ def test_render_meta_maec():
maec/analysis-conclusion: {:s}
features:
- number: 1
""".format(malware_family, malware_category, analysis_conclusion))
""".format(malware_family, malware_category, analysis_conclusion)
)
rule = capa.rules.Rule.from_yaml(rule_yaml)
rm = capa.render.result_document.RuleMatches(
meta=capa.render.result_document.RuleMetadata.from_capa(rule),

View File

@@ -22,7 +22,8 @@ import capa.rules
import capa.helpers
import capa.rules.cache
R1 = capa.rules.Rule.from_yaml(textwrap.dedent("""
R1 = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -38,9 +39,11 @@ R1 = capa.rules.Rule.from_yaml(textwrap.dedent("""
- and:
- number: 1
- number: 2
"""))
""")
)
R2 = capa.rules.Rule.from_yaml(textwrap.dedent("""
R2 = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule 2
@@ -56,7 +59,8 @@ R2 = capa.rules.Rule.from_yaml(textwrap.dedent("""
- and:
- number: 3
- number: 4
"""))
""")
)
def test_ruleset_cache_ids():

View File

@@ -149,7 +149,8 @@ def test_rule_descriptions():
def test_invalid_rule_statement_descriptions():
# statements can only have one description
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -161,7 +162,8 @@ def test_invalid_rule_statement_descriptions():
- number: 1 = This is the number 1
- description: description
- description: another description (invalid)
"""))
""")
)
def test_empty_yaml_raises_invalid_rule():
@@ -267,7 +269,8 @@ def test_rule_yaml_count_string():
def test_invalid_rule_feature():
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -276,10 +279,12 @@ def test_invalid_rule_feature():
dynamic: process
features:
- foo: true
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -288,10 +293,12 @@ def test_invalid_rule_feature():
dynamic: process
features:
- characteristic: nzxor
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -300,10 +307,12 @@ def test_invalid_rule_feature():
dynamic: thread
features:
- characteristic: embedded pe
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -312,11 +321,13 @@ def test_invalid_rule_feature():
dynamic: thread
features:
- characteristic: embedded pe
"""))
""")
)
def test_multi_scope_rules_features():
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -330,9 +341,11 @@ def test_multi_scope_rules_features():
- os: linux
- mnemonic: syscall
- number: 1 = write
"""))
""")
)
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -346,9 +359,11 @@ def test_multi_scope_rules_features():
- os: linux
- mnemonic: syscall
- number: 0 = read
"""))
""")
)
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -366,12 +381,14 @@ def test_multi_scope_rules_features():
- number: 6 = IPPROTO_TCP
- number: 1 = SOCK_STREAM
- number: 2 = AF_INET
"""))
""")
)
def test_rules_flavor_filtering():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: static rule
@@ -380,8 +397,10 @@ def test_rules_flavor_filtering():
dynamic: unsupported
features:
- api: CreateFileA
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: dynamic rule
@@ -390,7 +409,8 @@ def test_rules_flavor_filtering():
dynamic: thread
features:
- api: CreateFileA
""")),
""")
),
]
static_rules = capa.rules.RuleSet([r for r in rules if r.scopes.static is not None])
@@ -408,7 +428,8 @@ def test_meta_scope_keywords():
for static_scope in static_scopes:
for dynamic_scope in dynamic_scopes:
_ = capa.rules.Rule.from_yaml(textwrap.dedent(f"""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent(f"""
rule:
meta:
name: test rule
@@ -418,11 +439,13 @@ def test_meta_scope_keywords():
features:
- or:
- format: pe
"""))
""")
)
# its also ok to specify "unsupported"
for static_scope in static_scopes:
_ = capa.rules.Rule.from_yaml(textwrap.dedent(f"""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent(f"""
rule:
meta:
name: test rule
@@ -432,9 +455,11 @@ def test_meta_scope_keywords():
features:
- or:
- format: pe
"""))
""")
)
for dynamic_scope in dynamic_scopes:
_ = capa.rules.Rule.from_yaml(textwrap.dedent(f"""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent(f"""
rule:
meta:
name: test rule
@@ -444,11 +469,13 @@ def test_meta_scope_keywords():
features:
- or:
- format: pe
"""))
""")
)
# but at least one scope must be specified
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -456,9 +483,11 @@ def test_meta_scope_keywords():
features:
- or:
- format: pe
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -468,20 +497,22 @@ def test_meta_scope_keywords():
features:
- or:
- format: pe
"""))
""")
)
def test_subscope_same_as_scope():
static_scopes = sorted(
[e.value for e in capa.rules.STATIC_SCOPES if e not in (capa.rules.Scope.FILE, capa.rules.Scope.GLOBAL)]
)
dynamic_scopes = sorted(
[e.value for e in capa.rules.DYNAMIC_SCOPES if e not in (capa.rules.Scope.FILE, capa.rules.Scope.GLOBAL)]
)
static_scopes = sorted([
e.value for e in capa.rules.STATIC_SCOPES if e not in (capa.rules.Scope.FILE, capa.rules.Scope.GLOBAL)
])
dynamic_scopes = sorted([
e.value for e in capa.rules.DYNAMIC_SCOPES if e not in (capa.rules.Scope.FILE, capa.rules.Scope.GLOBAL)
])
for static_scope in static_scopes:
for dynamic_scope in dynamic_scopes:
_ = capa.rules.Rule.from_yaml(textwrap.dedent(f"""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent(f"""
rule:
meta:
name: test rule
@@ -494,13 +525,14 @@ def test_subscope_same_as_scope():
- format: pe
- {dynamic_scope}:
- format: pe
"""))
""")
)
def test_lib_rules():
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: a lib rule
@@ -510,8 +542,10 @@ def test_lib_rules():
lib: true
features:
- api: CreateFileA
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: a standard rule
@@ -521,17 +555,17 @@ def test_lib_rules():
lib: false
features:
- api: CreateFileW
""")),
]
)
""")
),
])
# lib rules are added to the rule set
assert len(rules.function_rules) == 2
def test_subscope_rules():
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test function subscope
@@ -545,8 +579,10 @@ def test_subscope_rules():
- and:
- characteristic: nzxor
- characteristic: loop
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test process subscope
@@ -559,8 +595,10 @@ def test_subscope_rules():
- process:
- and:
- substring: "http://"
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test thread subscope
@@ -572,8 +610,10 @@ def test_subscope_rules():
- string: "explorer.exe"
- thread:
- api: HttpOpenRequestW
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test call subscope
@@ -585,9 +625,9 @@ def test_subscope_rules():
- string: "explorer.exe"
- call:
- api: HttpOpenRequestW
""")),
]
)
""")
),
])
# the file rule scope will have four rules:
# - `test function subscope`, `test process subscope` and
# `test thread subscope` for the static scope
@@ -614,9 +654,9 @@ def test_subscope_rules():
def test_duplicate_rules():
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule-name
@@ -625,8 +665,10 @@ def test_duplicate_rules():
dynamic: process
features:
- api: CreateFileA
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule-name
@@ -635,16 +677,16 @@ def test_duplicate_rules():
dynamic: process
features:
- api: CreateFileW
""")),
]
)
""")
),
])
def test_missing_dependency():
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: dependent rule
@@ -653,14 +695,15 @@ def test_missing_dependency():
dynamic: process
features:
- match: missing rule
""")),
]
)
""")
),
])
def test_invalid_rules():
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -669,10 +712,12 @@ def test_invalid_rules():
dynamic: process
features:
- characteristic: number(1)
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -681,11 +726,13 @@ def test_invalid_rules():
dynamic: process
features:
- characteristic: count(number(100))
"""))
""")
)
# att&ck and mbc must be lists
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -695,9 +742,11 @@ def test_invalid_rules():
att&ck: Tactic::Technique::Subtechnique [Identifier]
features:
- number: 1
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -707,9 +756,11 @@ def test_invalid_rules():
mbc: Objective::Behavior::Method [Identifier]
features:
- number: 1
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -718,9 +769,11 @@ def test_invalid_rules():
behavior: process
features:
- number: 1
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -729,9 +782,11 @@ def test_invalid_rules():
dynamic: process
features:
- number: 1
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -740,9 +795,11 @@ def test_invalid_rules():
dynamic: process
features:
- number: 1
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -751,7 +808,8 @@ def test_invalid_rules():
dynamic: function
features:
- number: 1
"""))
""")
)
def test_number_symbol():
@@ -828,7 +886,8 @@ def test_count_api():
def test_invalid_number():
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -837,10 +896,12 @@ def test_invalid_number():
dynamic: process
features:
- number: "this is a string"
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -849,10 +910,12 @@ def test_invalid_number():
dynamic: process
features:
- number: 2=
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -861,7 +924,8 @@ def test_invalid_number():
dynamic: process
features:
- number: symbol name = 2
"""))
""")
)
def test_offset_symbol():
@@ -913,7 +977,8 @@ def test_count_offset_symbol():
def test_invalid_offset():
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -922,10 +987,12 @@ def test_invalid_offset():
dynamic: process
features:
- offset: "this is a string"
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -934,10 +1001,12 @@ def test_invalid_offset():
dynamic: process
features:
- offset: 2=
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -946,12 +1015,14 @@ def test_invalid_offset():
dynamic: process
features:
- offset: symbol name = 2
"""))
""")
)
def test_invalid_string_values_int():
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -960,10 +1031,12 @@ def test_invalid_string_values_int():
dynamic: process
features:
- string: 123
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -972,7 +1045,8 @@ def test_invalid_string_values_int():
dynamic: process
features:
- string: 0x123
"""))
""")
)
def test_explicit_string_values_int():
@@ -1054,9 +1128,9 @@ def test_substring_description():
def test_filter_rules():
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule 1
@@ -1067,8 +1141,10 @@ def test_filter_rules():
- joe
features:
- api: CreateFile
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule 2
@@ -1077,18 +1153,18 @@ def test_filter_rules():
dynamic: process
features:
- string: joe
""")),
]
)
""")
),
])
rules = rules.filter_rules_by_meta("joe")
assert len(rules) == 1
assert "rule 1" in rules.rules
def test_filter_rules_dependencies():
rules = capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule 1
@@ -1097,8 +1173,10 @@ def test_filter_rules_dependencies():
dynamic: process
features:
- match: rule 2
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule 2
@@ -1107,8 +1185,10 @@ def test_filter_rules_dependencies():
dynamic: process
features:
- match: rule 3
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule 3
@@ -1117,9 +1197,9 @@ def test_filter_rules_dependencies():
dynamic: process
features:
- api: CreateFile
""")),
]
)
""")
),
])
rules = rules.filter_rules_by_meta("rule 1")
assert len(rules.rules) == 3
assert "rule 1" in rules.rules
@@ -1129,9 +1209,9 @@ def test_filter_rules_dependencies():
def test_filter_rules_missing_dependency():
with pytest.raises(capa.rules.InvalidRule):
capa.rules.RuleSet(
[
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule 1
@@ -1142,14 +1222,15 @@ def test_filter_rules_missing_dependency():
- joe
features:
- match: rule 2
""")),
]
)
""")
),
])
def test_rules_namespace_dependencies():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule 1
@@ -1159,8 +1240,10 @@ def test_rules_namespace_dependencies():
namespace: ns1/nsA
features:
- api: CreateFile
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule 2
@@ -1170,8 +1253,10 @@ def test_rules_namespace_dependencies():
namespace: ns1/nsB
features:
- api: CreateFile
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule 3
@@ -1180,8 +1265,10 @@ def test_rules_namespace_dependencies():
dynamic: process
features:
- match: ns1/nsA
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: rule 4
@@ -1190,7 +1277,8 @@ def test_rules_namespace_dependencies():
dynamic: process
features:
- match: ns1
""")),
""")
),
]
r3 = {r.name for r in capa.rules.get_rules_and_dependencies(rules, "rule 3")}
@@ -1281,7 +1369,8 @@ def test_arch_features():
def test_property_access():
r = capa.rules.Rule.from_yaml(textwrap.dedent("""
r = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -1290,7 +1379,8 @@ def test_property_access():
dynamic: process
features:
- property/read: System.IO.FileInfo::Length
"""))
""")
)
assert bool(r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.READ): {ADDR1}})) is True
assert bool(r.evaluate({Property("System.IO.FileInfo::Length"): {ADDR1}})) is False
@@ -1298,7 +1388,8 @@ def test_property_access():
def test_property_access_symbol():
r = capa.rules.Rule.from_yaml(textwrap.dedent("""
r = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -1307,23 +1398,21 @@ def test_property_access_symbol():
dynamic: process
features:
- property/read: System.IO.FileInfo::Length = some property
"""))
""")
)
assert (
bool(
r.evaluate(
{
Property("System.IO.FileInfo::Length", access=FeatureAccess.READ, description="some property"): {
ADDR1
}
}
)
r.evaluate({
Property("System.IO.FileInfo::Length", access=FeatureAccess.READ, description="some property"): {ADDR1}
})
)
is True
)
def test_translate_com_features():
r = capa.rules.Rule.from_yaml(textwrap.dedent("""
r = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -1334,7 +1423,8 @@ def test_translate_com_features():
- com/class: WICPngDecoder
# 389ea17b-5078-4cde-b6ef-25c15175c751 WICPngDecoder
# e018945b-aa86-4008-9bd4-6777a1e40c11 WICPngDecoder
"""))
""")
)
com_name = "WICPngDecoder"
com_features = [
capa.features.common.Bytes(b"{\xa1\x9e8xP\xdeL\xb6\xef%\xc1Qu\xc7Q", f"CLSID_{com_name} as bytes"),
@@ -1348,39 +1438,46 @@ def test_translate_com_features():
def test_invalid_com_features():
# test for unknown COM class
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
features:
- com/class: invalid_com
"""))
""")
)
# test for unknown COM interface
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
features:
- com/interface: invalid_com
"""))
""")
)
# test for invalid COM type
# valid_com_types = "class", "interface"
with pytest.raises(capa.rules.InvalidRule):
_ = capa.rules.Rule.from_yaml(textwrap.dedent("""
_ = capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
features:
- com/invalid_COM_type: WICPngDecoder
"""))
""")
)
def test_circular_dependency():
rules = [
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule 1
@@ -1392,8 +1489,10 @@ def test_circular_dependency():
- or:
- match: test rule 2
- api: kernel32.VirtualAlloc
""")),
capa.rules.Rule.from_yaml(textwrap.dedent("""
""")
),
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule 2
@@ -1403,7 +1502,8 @@ def test_circular_dependency():
lib: true
features:
- match: test rule 1
""")),
""")
),
]
with pytest.raises(capa.rules.InvalidRule):
list(capa.rules.get_rules_and_dependencies(rules, rules[0].name))

View File

@@ -21,7 +21,8 @@ import capa.rules
def test_rule_scope_instruction():
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -33,10 +34,12 @@ def test_rule_scope_instruction():
- mnemonic: mov
- arch: i386
- os: windows
"""))
""")
)
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -45,11 +48,14 @@ def test_rule_scope_instruction():
dynamic: unsupported
features:
- characteristic: embedded pe
"""))
""")
)
def test_rule_subscope_instruction():
rules = capa.rules.RuleSet([capa.rules.Rule.from_yaml(textwrap.dedent("""
rules = capa.rules.RuleSet([
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -63,7 +69,9 @@ def test_rule_subscope_instruction():
- mnemonic: mov
- arch: i386
- os: windows
"""))])
""")
)
])
# the function rule scope will have one rules:
# - `test rule`
assert len(rules.function_rules) == 1
@@ -74,7 +82,8 @@ def test_rule_subscope_instruction():
def test_scope_instruction_implied_and():
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -87,11 +96,13 @@ def test_scope_instruction_implied_and():
- mnemonic: mov
- arch: i386
- os: windows
"""))
""")
)
def test_scope_instruction_description():
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -105,9 +116,11 @@ def test_scope_instruction_description():
- mnemonic: mov
- arch: i386
- os: windows
"""))
""")
)
capa.rules.Rule.from_yaml(textwrap.dedent("""
capa.rules.Rule.from_yaml(
textwrap.dedent("""
rule:
meta:
name: test rule
@@ -121,4 +134,5 @@ def test_scope_instruction_description():
- mnemonic: mov
- arch: i386
- os: windows
"""))
""")
)

View File

@@ -268,9 +268,9 @@ def generate_html(categories_data, color_map):
<div class="card">
<div class="thin-rectangle" style="background-color: {rectangle_color};"></div>
<div class="card-body">
<div class="namespace">{card['namespace']}</div>
<div class="rule-name"><a href="./{card['name']}/">{card['name']}</a></div>
<div class="authors">{', '.join(card['authors'])}</div>
<div class="namespace">{card["namespace"]}</div>
<div class="rule-name"><a href="./{card["name"]}/">{card["name"]}</a></div>
<div class="authors">{", ".join(card["authors"])}</div>
</div>
</div>
</div>"""

View File

@@ -127,7 +127,7 @@ def render_rule(timestamps, path: Path) -> str:
return html_content
yaml_files = glob(os.path.join(input_directory, "**/*.yml"), recursive=True)
yaml_files = [str(p) for p in input_directory.glob("**/*.yml")]
timestamps = {}
for line in txt_file_path.read_text(encoding="utf-8").splitlines():