Compare commits

..

20 Commits

Author SHA1 Message Date
Willi Ballenthin
826377530d add Lancelot backend 2025-02-26 09:28:55 +00:00
Willi Ballenthin
9c90f0e554 binexport2: extract dll name from linked library 2025-02-26 09:28:55 +00:00
Willi Ballenthin
7431c67bbe binexport2: parse BinExport2 from raw bytes 2025-02-26 09:28:55 +00:00
Capa Bot
45ea683d19 Sync capa-testfiles submodule 2025-02-26 08:56:48 +00:00
Capa Bot
2b95fa089d Sync capa rules submodule 2025-02-25 15:59:41 +00:00
Mike Hunhoff
d3d71f97c8 vmray: only verify process OS and monitor ID match (#2613) 2025-02-24 14:14:05 -07:00
Willi Ballenthin
4c9d81072a main: don't require rules to render result document directly (#2611) 2025-02-24 17:47:00 +01:00
Capa Bot
a94c68377a Sync capa rules submodule 2025-02-22 19:41:30 +00:00
Capa Bot
14e076864c Sync capa-testfiles submodule 2025-02-22 19:13:14 +00:00
Capa Bot
6684f9f890 Sync capa rules submodule 2025-02-21 19:37:24 +00:00
dependabot[bot]
e622989eeb build(deps): bump psutil from 6.1.0 to 7.0.0 (#2605)
Bumps [psutil](https://github.com/giampaolo/psutil) from 6.1.0 to 7.0.0.
- [Changelog](https://github.com/giampaolo/psutil/blob/master/HISTORY.rst)
- [Commits](https://github.com/giampaolo/psutil/compare/release-6.1.0...release-7.0.0)

---
updated-dependencies:
- dependency-name: psutil
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Mike Hunhoff <mike.hunhoff@gmail.com>
2025-02-21 10:26:04 -07:00
Capa Bot
9c9dd15bf9 Sync capa rules submodule 2025-02-21 16:29:46 +00:00
Capa Bot
06fad4a89e Sync capa-testfiles submodule 2025-02-21 12:17:50 +00:00
Capa Bot
e06a0ab75f Sync capa rules submodule 2025-02-21 12:16:25 +00:00
Capa Bot
0371ade358 Sync capa rules submodule 2025-02-20 22:18:12 +00:00
dependabot[bot]
80b5a116a5 build(deps): bump pygithub from 2.5.0 to 2.6.0 (#2604)
Bumps [pygithub](https://github.com/pygithub/pygithub) from 2.5.0 to 2.6.0.
- [Release notes](https://github.com/pygithub/pygithub/releases)
- [Changelog](https://github.com/PyGithub/PyGithub/blob/main/doc/changes.rst)
- [Commits](https://github.com/pygithub/pygithub/compare/v2.5.0...v2.6.0)

---
updated-dependencies:
- dependency-name: pygithub
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-02-20 12:50:10 -07:00
dependabot[bot]
9a270e6bdd build(deps): bump pyinstaller from 6.11.1 to 6.12.0 (#2602)
Bumps [pyinstaller](https://github.com/pyinstaller/pyinstaller) from 6.11.1 to 6.12.0.
- [Release notes](https://github.com/pyinstaller/pyinstaller/releases)
- [Changelog](https://github.com/pyinstaller/pyinstaller/blob/develop/doc/CHANGES.rst)
- [Commits](https://github.com/pyinstaller/pyinstaller/compare/v6.11.1...v6.12.0)

---
updated-dependencies:
- dependency-name: pyinstaller
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Mike Hunhoff <mike.hunhoff@gmail.com>
2025-02-19 20:35:07 +01:00
dependabot[bot]
8773bc77ab build(deps): bump mypy from 1.14.1 to 1.15.0 (#2601)
Bumps [mypy](https://github.com/python/mypy) from 1.14.1 to 1.15.0.
- [Changelog](https://github.com/python/mypy/blob/master/CHANGELOG.md)
- [Commits](https://github.com/python/mypy/compare/v1.14.1...v1.15.0)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Mike Hunhoff <mike.hunhoff@gmail.com>
2025-02-19 20:34:51 +01:00
Mike Hunhoff
a278bf593a cape: models: parse minimum fields required for analysis (#2607)
* cape: models: parse minimum fields required for analysis

* update CHANGELOG
2025-02-19 08:55:12 -07:00
Capa Bot
f85cd80d90 Sync capa rules submodule 2025-02-11 09:25:04 +00:00
13 changed files with 267 additions and 168 deletions

View File

@@ -6,11 +6,17 @@
### Breaking Changes
### New Rules (0)
### New Rules (4)
- host-interaction/registry/change-registry-key-timestamp wballenthin@google.com
- host-interaction/mutex/check-mutex-and-terminate-process-on-windows @_re_fox moritz.raabe@mandiant.com mehunhoff@google.com
- anti-analysis/anti-forensic/clear-logs/clear-windows-event-logs-remotely 99.elad.levi@gmail.com
-
### Bug Fixes
- only parse CAPE fields required for analysis @mike-hunhoff #2607
- main: render result document without needing associated rules @williballenthin #2610
- vmray: only verify process OS and monitor IDs match @mike-hunhoff #2613
### capa Explorer Web

View File

@@ -40,12 +40,16 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
logger = logging.getLogger(__name__)
def get_binexport2(sample: Path) -> BinExport2:
def get_binexport2_from_bytes(buf: bytes) -> BinExport2:
be2: BinExport2 = BinExport2()
be2.ParseFromString(sample.read_bytes())
be2.ParseFromString(buf)
return be2
def get_binexport2(sample: Path) -> BinExport2:
return get_binexport2_from_bytes(sample.read_bytes())
def compute_common_prefix_length(m: str, n: str) -> int:
# ensure #m < #n
if len(n) < len(m):

View File

@@ -64,12 +64,17 @@ def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
):
continue
dll = ""
if vertex.HasField("library_index"):
library = be2.library[vertex.library_index]
dll = library.name
if not vertex.HasField("mangled_name"):
logger.debug("vertex %d does not have mangled_name", vertex_idx)
continue
api_name: str = vertex.mangled_name
for name in capa.features.extractors.helpers.generate_symbols("", api_name):
for name in capa.features.extractors.helpers.generate_symbols(dll, api_name):
yield API(name), ih.address

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
from typing import Any, Union, Optional, Annotated, TypeAlias
from pydantic import Field, BaseModel, ConfigDict
from pydantic.functional_validators import BeforeValidator
@@ -75,34 +75,37 @@ class Info(FlexibleModel):
version: str
class ImportedSymbol(ExactModel):
class ImportedSymbol(FlexibleModel):
address: HexInt
name: Optional[str] = None
class ImportedDll(ExactModel):
class ImportedDll(FlexibleModel):
dll: str
imports: list[ImportedSymbol]
class DirectoryEntry(ExactModel):
"""
class DirectoryEntry(FlexibleModel):
name: str
virtual_address: HexInt
size: HexInt
"""
class Section(ExactModel):
class Section(FlexibleModel):
name: str
raw_address: HexInt
# raw_address: HexInt
virtual_address: HexInt
virtual_size: HexInt
size_of_data: HexInt
characteristics: str
characteristics_raw: HexInt
entropy: float
# virtual_size: HexInt
# size_of_data: HexInt
# characteristics: str
# characteristics_raw: HexInt
# entropy: float
class Resource(ExactModel):
"""
class Resource(FlexibleModel):
name: str
language: Optional[str] = None
sublanguage: str
@@ -140,7 +143,7 @@ class DigitalSigner(FlexibleModel):
extensions_subjectKeyIdentifier: Optional[str] = None
class AuxSigner(ExactModel):
class AuxSigner(FlexibleModel):
name: str
issued_to: str = Field(alias="Issued to")
issued_by: str = Field(alias="Issued by")
@@ -148,7 +151,7 @@ class AuxSigner(ExactModel):
sha1_hash: str = Field(alias="SHA1 hash")
class Signer(ExactModel):
class Signer(FlexibleModel):
aux_sha1: Optional[str] = None
aux_timestamp: Optional[str] = None
aux_valid: Optional[bool] = None
@@ -157,60 +160,61 @@ class Signer(ExactModel):
aux_signers: Optional[list[AuxSigner]] = None
class Overlay(ExactModel):
class Overlay(FlexibleModel):
offset: HexInt
size: HexInt
class KV(ExactModel):
class KV(FlexibleModel):
name: str
value: str
"""
class ExportedSymbol(ExactModel):
class ExportedSymbol(FlexibleModel):
address: HexInt
name: str
ordinal: int
# ordinal: int
class PE(ExactModel):
peid_signatures: TODO
class PE(FlexibleModel):
# peid_signatures: TODO
imagebase: HexInt
entrypoint: HexInt
reported_checksum: HexInt
actual_checksum: HexInt
osversion: str
pdbpath: Optional[str] = None
timestamp: str
# entrypoint: HexInt
# reported_checksum: HexInt
# actual_checksum: HexInt
# osversion: str
# pdbpath: Optional[str] = None
# timestamp: str
# list[ImportedDll], or dict[basename(dll), ImportedDll]
imports: Union[list[ImportedDll], dict[str, ImportedDll]]
imported_dll_count: Optional[int] = None
imphash: str
# imported_dll_count: Optional[int] = None
# imphash: str
exported_dll_name: Optional[str] = None
# exported_dll_name: Optional[str] = None
exports: list[ExportedSymbol]
dirents: list[DirectoryEntry]
# dirents: list[DirectoryEntry]
sections: list[Section]
ep_bytes: Optional[HexBytes] = None
# ep_bytes: Optional[HexBytes] = None
overlay: Optional[Overlay] = None
resources: list[Resource]
versioninfo: list[KV]
# overlay: Optional[Overlay] = None
# resources: list[Resource]
# versioninfo: list[KV]
# base64 encoded data
icon: Optional[str] = None
# icon: Optional[str] = None
# MD5-like hash
icon_hash: Optional[str] = None
# icon_hash: Optional[str] = None
# MD5-like hash
icon_fuzzy: Optional[str] = None
# icon_fuzzy: Optional[str] = None
# short hex string
icon_dhash: Optional[str] = None
# icon_dhash: Optional[str] = None
digital_signers: list[DigitalSigner]
guest_signers: Signer
# digital_signers: list[DigitalSigner]
# guest_signers: Signer
# TODO(mr-tz): target.file.dotnet, target.file.extracted_files, target.file.extracted_files_tool,
@@ -218,48 +222,49 @@ class PE(ExactModel):
# https://github.com/mandiant/capa/issues/1814
class File(FlexibleModel):
type: str
cape_type_code: Optional[int] = None
cape_type: Optional[str] = None
# cape_type_code: Optional[int] = None
# cape_type: Optional[str] = None
pid: Optional[Union[int, Literal[""]]] = None
name: Union[list[str], str]
path: str
guest_paths: Union[list[str], str, None]
timestamp: Optional[str] = None
# pid: Optional[Union[int, Literal[""]]] = None
# name: Union[list[str], str]
# path: str
# guest_paths: Union[list[str], str, None]
# timestamp: Optional[str] = None
#
# hashes
#
crc32: str
# crc32: str
md5: str
sha1: str
sha256: str
sha512: str
sha3_384: Optional[str] = None
ssdeep: str
# sha512: str
# sha3_384: Optional[str] = None
# ssdeep: str
# unsure why this would ever be "False"
tlsh: Optional[Union[str, bool]] = None
rh_hash: Optional[str] = None
# tlsh: Optional[Union[str, bool]] = None
# rh_hash: Optional[str] = None
#
# other metadata, static analysis
#
size: int
# size: int
pe: Optional[PE] = None
ep_bytes: Optional[HexBytes] = None
entrypoint: Optional[int] = None
data: Optional[str] = None
strings: Optional[list[str]] = None
# ep_bytes: Optional[HexBytes] = None
# entrypoint: Optional[int] = None
# data: Optional[str] = None
# strings: Optional[list[str]] = None
#
# detections (skip)
#
yara: Skip = None
cape_yara: Skip = None
clamav: Skip = None
virustotal: Skip = None
# yara: Skip = None
# cape_yara: Skip = None
# clamav: Skip = None
# virustotal: Skip = None
"""
class ProcessFile(File):
#
# like a File, but also has dynamic analysis results
@@ -272,35 +277,36 @@ class ProcessFile(File):
target_pid: Optional[Union[int, str]] = None
target_path: Optional[str] = None
target_process: Optional[str] = None
"""
class Argument(ExactModel):
class Argument(FlexibleModel):
name: str
# unsure why empty list is provided here
value: Union[HexInt, int, str, EmptyList]
pretty_value: Optional[str] = None
class Call(ExactModel):
timestamp: str
class Call(FlexibleModel):
# timestamp: str
thread_id: int
category: str
# category: str
api: str
arguments: list[Argument]
status: bool
# status: bool
return_: HexInt = Field(alias="return")
pretty_return: Optional[str] = None
repeated: int
# repeated: int
# virtual addresses
caller: HexInt
parentcaller: HexInt
# caller: HexInt
# parentcaller: HexInt
# index into calls array
id: int
# id: int
# FlexibleModel to account for extended fields
@@ -310,14 +316,15 @@ class Process(FlexibleModel):
process_id: int
process_name: str
parent_id: int
module_path: str
first_seen: str
# module_path: str
# first_seen: str
calls: list[Call]
threads: list[int]
environ: dict[str, str]
class ProcessTree(ExactModel):
"""
class ProcessTree(FlexibleModel):
name: str
pid: int
parent_id: int
@@ -325,17 +332,18 @@ class ProcessTree(ExactModel):
threads: list[int]
environ: dict[str, str]
children: list["ProcessTree"]
"""
class Summary(ExactModel):
class Summary(FlexibleModel):
files: list[str]
read_files: list[str]
write_files: list[str]
delete_files: list[str]
# read_files: list[str]
# write_files: list[str]
# delete_files: list[str]
keys: list[str]
read_keys: list[str]
write_keys: list[str]
delete_keys: list[str]
# read_keys: list[str]
# write_keys: list[str]
# delete_keys: list[str]
executed_commands: list[str]
resolved_apis: list[str]
mutexes: list[str]
@@ -343,7 +351,8 @@ class Summary(ExactModel):
started_services: list[str]
class EncryptedBuffer(ExactModel):
"""
class EncryptedBuffer(FlexibleModel):
process_name: str
pid: int
@@ -351,38 +360,41 @@ class EncryptedBuffer(ExactModel):
buffer: str
buffer_size: Optional[int] = None
crypt_key: Optional[Union[HexInt, str]] = None
"""
class Behavior(ExactModel):
class Behavior(FlexibleModel):
summary: Summary
# list of processes, of threads, of calls
processes: list[Process]
# tree of processes
processtree: list[ProcessTree]
# processtree: list[ProcessTree]
anomaly: list[str]
encryptedbuffers: list[EncryptedBuffer]
# anomaly: list[str]
# encryptedbuffers: list[EncryptedBuffer]
# these are small objects that describe atomic events,
# like file move, registry access.
# we'll detect the same with our API call analysis.
enhanced: Skip = None
# enhanced: Skip = None
class Target(ExactModel):
category: str
class Target(FlexibleModel):
# category: str
file: File
# pe: Optional[PE] = None
class Static(FlexibleModel):
pe: Optional[PE] = None
# flare_capa: Skip = None
class Static(ExactModel):
pe: Optional[PE] = None
flare_capa: Skip = None
class Cape(ExactModel):
"""
class Cape(FlexibleModel):
payloads: list[ProcessFile]
configs: Skip = None
"""
# flexible because there may be more sorts of analysis
@@ -405,15 +417,14 @@ class CapeReport(FlexibleModel):
# post-processed results: process tree, anomalies, etc
behavior: Behavior
# post-processed results: payloads and extracted configs
CAPE: Optional[Union[Cape, list]] = None
dropped: Optional[list[File]] = None
procdump: Optional[list[ProcessFile]] = None
procmemory: Optional[ListTODO] = None
# =========================================================================
# information we won't use in capa
#
# post-processed results: payloads and extracted configs
# CAPE: Optional[Union[Cape, list]] = None
# dropped: Optional[list[File]] = None
# procdump: Optional[list[ProcessFile]] = None
# procmemory: Optional[ListTODO] = None
#
# NBIs and HBIs
@@ -422,32 +433,32 @@ class CapeReport(FlexibleModel):
#
# if we come up with a future use for this, go ahead and re-enable!
#
network: Skip = None
suricata: Skip = None
curtain: Skip = None
sysmon: Skip = None
url_analysis: Skip = None
# network: Skip = None
# suricata: Skip = None
# curtain: Skip = None
# sysmon: Skip = None
# url_analysis: Skip = None
# screenshot hash values
deduplicated_shots: Skip = None
# deduplicated_shots: Skip = None
# k-v pairs describing the time it took to run each stage.
statistics: Skip = None
# statistics: Skip = None
# k-v pairs of ATT&CK ID to signature name or similar.
ttps: Skip = None
# ttps: Skip = None
# debug log messages
debug: Skip = None
# debug: Skip = None
# various signature matches
# we could potentially extend capa to use this info one day,
# though it would be quite sandbox-specific,
# and more detection-oriented than capability detection.
signatures: Skip = None
malfamily_tag: Optional[str] = None
malscore: float
detections: Skip = None
detections2pid: Optional[dict[int, list[str]]] = None
# signatures: Skip = None
# malfamily_tag: Optional[str] = None
# malscore: float
# detections: Skip = None
# detections2pid: Optional[dict[int, list[str]]] = None
# AV detections for the sample.
virustotal: Skip = None
# virustotal: Skip = None
@classmethod
def from_buf(cls, buf: bytes) -> "CapeReport":

View File

@@ -223,16 +223,15 @@ class VMRayAnalysis:
# we expect monitor processes recorded in both SummaryV2.json and flog.xml to equal
# to ensure this, we compare the pid, monitor_id, and origin_monitor_id
# for the other fields we've observed cases with slight deviations, e.g.,
# the ppid for a process in flog.xml is not set correctly, all other data is equal
# the ppid, origin monitor id, etc. for a process in flog.xml is not set correctly, all other
# data is equal
sv2p = self.monitor_processes[monitor_process.process_id]
if self.monitor_processes[monitor_process.process_id] != vmray_monitor_process:
logger.debug("processes differ: %s (sv2) vs. %s (flog)", sv2p, vmray_monitor_process)
assert (sv2p.pid, sv2p.monitor_id, sv2p.origin_monitor_id) == (
vmray_monitor_process.pid,
vmray_monitor_process.monitor_id,
vmray_monitor_process.origin_monitor_id,
)
# we need, at a minimum, for the process id and monitor id to match, otherwise there is likely a bug
# in the way that VMRay tracked one of the processes
assert (sv2p.pid, sv2p.monitor_id) == (vmray_monitor_process.pid, vmray_monitor_process.monitor_id)
def _compute_monitor_threads(self):
for monitor_thread in self.flog.analysis.monitor_threads:

View File

@@ -79,6 +79,7 @@ BACKEND_VMRAY = "vmray"
BACKEND_FREEZE = "freeze"
BACKEND_BINEXPORT2 = "binexport2"
BACKEND_IDA = "ida"
BACKEND_LANCELOT = "lancelot"
class CorruptFile(ValueError):
@@ -351,6 +352,18 @@ def get_extractor(
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
elif backend == BACKEND_LANCELOT:
import lancelot
import capa.features.extractors.binexport2
import capa.features.extractors.binexport2.extractor
buf = input_path.read_bytes()
be2_buf: bytes = lancelot.binexport2_from_bytes(buf)
be2 = capa.features.extractors.binexport2.get_binexport2_from_bytes(be2_buf)
return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
else:
raise ValueError("unexpected backend: " + backend)

View File

@@ -57,6 +57,7 @@ from capa.loader import (
BACKEND_FREEZE,
BACKEND_PEFILE,
BACKEND_DRAKVUF,
BACKEND_LANCELOT,
BACKEND_BINEXPORT2,
)
from capa.helpers import (
@@ -298,6 +299,7 @@ def install_common_args(parser, wanted=None):
(BACKEND_BINJA, "Binary Ninja"),
(BACKEND_DOTNET, ".NET"),
(BACKEND_BINEXPORT2, "BinExport2"),
(BACKEND_LANCELOT, "Lancelot"),
(BACKEND_FREEZE, "capa freeze"),
(BACKEND_CAPE, "CAPE"),
(BACKEND_DRAKVUF, "DRAKVUF"),
@@ -995,7 +997,27 @@ def main(argv: Optional[list[str]] = None):
handle_common_args(args)
ensure_input_exists_from_cli(args)
input_format = get_input_format_from_cli(args)
rules = get_rules_from_cli(args)
except ShouldExitError as e:
return e.status_code
if input_format == FORMAT_RESULT:
# render the result document immediately,
# no need to load the rules or do other processing.
result_doc = capa.render.result_document.ResultDocument.from_file(args.input_file)
if args.json:
print(result_doc.model_dump_json(exclude_none=True))
elif args.vverbose:
print(capa.render.vverbose.render_vverbose(result_doc))
elif args.verbose:
print(capa.render.verbose.render_verbose(result_doc))
else:
print(capa.render.default.render_default(result_doc))
return 0
try:
rules: RuleSet = get_rules_from_cli(args)
found_limitation = False
file_extractors = get_file_extractors_from_cli(args, input_format)
if input_format in STATIC_FORMATS:
@@ -1003,45 +1025,30 @@ def main(argv: Optional[list[str]] = None):
found_limitation = find_static_limitations_from_cli(args, rules, file_extractors)
if input_format in DYNAMIC_FORMATS:
found_limitation = find_dynamic_limitations_from_cli(args, rules, file_extractors)
backend = get_backend_from_cli(args, input_format)
sample_path = get_sample_path_from_cli(args, backend)
if sample_path is None:
os_ = "unknown"
else:
os_ = capa.loader.get_os(sample_path)
extractor: FeatureExtractor = get_extractor_from_cli(args, input_format, backend)
except ShouldExitError as e:
return e.status_code
meta: rdoc.Metadata
capabilities: Capabilities
capabilities: Capabilities = find_capabilities(rules, extractor, disable_progress=args.quiet)
if input_format == FORMAT_RESULT:
# result document directly parses into meta, capabilities
result_doc = capa.render.result_document.ResultDocument.from_file(args.input_file)
meta, capabilities = result_doc.to_capa()
meta: rdoc.Metadata = capa.loader.collect_metadata(
argv, args.input_file, input_format, os_, args.rules, extractor, capabilities
)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
else:
# all other formats we must create an extractor
# and use that to extract meta and capabilities
try:
backend = get_backend_from_cli(args, input_format)
sample_path = get_sample_path_from_cli(args, backend)
if sample_path is None:
os_ = "unknown"
else:
os_ = capa.loader.get_os(sample_path)
extractor = get_extractor_from_cli(args, input_format, backend)
except ShouldExitError as e:
return e.status_code
capabilities = find_capabilities(rules, extractor, disable_progress=args.quiet)
meta = capa.loader.collect_metadata(
argv, args.input_file, input_format, os_, args.rules, extractor, capabilities
)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
if found_limitation:
# bail if capa's static feature extractor encountered file limitation e.g. a packed binary
# or capa's dynamic feature extractor encountered some limitation e.g. a dotnet sample
# do show the output in verbose mode, though.
if not (args.verbose or args.vverbose or args.json):
return E_FILE_LIMITATION
if found_limitation:
# bail if capa's static feature extractor encountered file limitation e.g. a packed binary
# or capa's dynamic feature extractor encountered some limitation e.g. a dotnet sample
# do show the output in verbose mode, though.
if not (args.verbose or args.vverbose or args.json):
return E_FILE_LIMITATION
if args.json:
print(capa.render.json.render(meta, rules, capabilities.matches))

View File

@@ -139,9 +139,9 @@ dev = [
"ruff==0.9.2",
"black==25.1.0",
"isort==6.0.0",
"mypy==1.14.1",
"mypy==1.15.0",
"mypy-protobuf==3.6.0",
"PyGithub==2.5.0",
"PyGithub==2.6.0",
# type stubs for mypy
"types-backports==0.1.3",
"types-colorama==0.4.15.11",
@@ -156,13 +156,13 @@ build = [
# we want all developer environments to be consistent.
# These dependencies are not used in production environments
# and should not conflict with other libraries/tooling.
"pyinstaller==6.11.1",
"pyinstaller==6.12.0",
"setuptools==75.8.0",
"build==1.2.2"
]
scripts = [
"jschema_to_python==1.2.3",
"psutil==6.1.0",
"psutil==7.0.0",
"stix2==3.0.1",
"sarif_om==1.0.4",
"requests==2.32.3",

2
rules

Submodule rules updated: 79afc557f1...6221d9b72b

View File

@@ -36,7 +36,7 @@ import capa.main
logger = logging.getLogger("capa.compare-backends")
BACKENDS = ("vivisect", "ida", "binja")
BACKENDS = ("vivisect", "ida", "binja", "lancelot")
@dataclass
@@ -113,6 +113,9 @@ def collect(args):
file.unlink()
doc = json.loads(results_path.read_text(encoding="utf-8"))
for backend in BACKENDS:
if backend not in doc:
doc[backend] = {}
plan = []
for file in sorted(p for p in testfiles.glob("*")):
@@ -228,6 +231,7 @@ def report(args):
t.add_column("viv")
t.add_column("ida")
t.add_column("bn")
t.add_column("lan")
t.add_column("rule")
for rule, _ in seen_rules.most_common():
@@ -235,6 +239,7 @@ def report(args):
"x" if rule in rules_by_backend["vivisect"] else " ",
"x" if rule in rules_by_backend["ida"] else " ",
"x" if rule in rules_by_backend["binja"] else " ",
"x" if rule in rules_by_backend["lancelot"] else " ",
rule,
)

View File

@@ -134,6 +134,23 @@ def fixup_viv(path: Path, extractor):
extractor.vw.makeFunction(0x404970)
@lru_cache
def get_lancelot_extractor(path: Path):
import lancelot
import capa.features.extractors.binexport2
import capa.features.extractors.binexport2.extractor
buf = path.read_bytes()
be2_buf: bytes = lancelot.binexport2_from_bytes(buf)
be2 = capa.features.extractors.binexport2.get_binexport2_from_bytes(be2_buf)
extractor = capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
setattr(extractor, "path", path.as_posix())
return extractor
@lru_cache(maxsize=1)
def get_pefile_extractor(path: Path):
import capa.features.extractors.pefile

View File

@@ -0,0 +1,32 @@
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import pytest
import fixtures
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
indirect=["sample", "scope"],
)
def test_lancelot_features(sample, scope, feature, expected):
if ".elf" in sample.name:
pytest.xfail("lancelot doesn't handle ELF files")
fixtures.do_test_feature_presence(fixtures.get_lancelot_extractor, sample, scope, feature, expected)
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_lancelot_feature_counts(sample, scope, feature, expected):
if ".elf" in sample.name:
pytest.xfail("lancelot doesn't handle ELF files")
fixtures.do_test_feature_count(fixtures.get_lancelot_extractor, sample, scope, feature, expected)