mirror of
https://github.com/mandiant/capa.git
synced 2026-02-04 11:07:53 -08:00
base_extractor(): add a StaticFeatureExtractor and DynamicFeatureExtractor base classes, as well as a FeatureExtractor type alias
This commit is contained in:
@@ -63,16 +63,18 @@ class InsnHandle:
|
||||
inner: Any
|
||||
|
||||
|
||||
class FeatureExtractor:
|
||||
class StaticFeatureExtractor:
|
||||
"""
|
||||
FeatureExtractor defines the interface for fetching features from a sample.
|
||||
StaticFeatureExtractor defines the interface for fetching features from a
|
||||
sample without running it; extractors that rely on the execution trace of
|
||||
a sample must implement the other sibling class, DynamicFeatureExtracor.
|
||||
|
||||
There may be multiple backends that support fetching features for capa.
|
||||
For example, we use vivisect by default, but also want to support saving
|
||||
and restoring features from a JSON file.
|
||||
When we restore the features, we'd like to use exactly the same matching logic
|
||||
to find matching rules.
|
||||
Therefore, we can define a FeatureExtractor that provides features from the
|
||||
Therefore, we can define a StaticFeatureExtractor that provides features from the
|
||||
serialized JSON file and do matching without a binary analysis pass.
|
||||
Also, this provides a way to hook in an IDA backend.
|
||||
|
||||
@@ -292,9 +294,11 @@ class ThreadHandle:
|
||||
inner: Any
|
||||
|
||||
|
||||
class DynamicExtractor(FeatureExtractor):
|
||||
class DynamicFeatureExtractor:
|
||||
"""
|
||||
DynamicExtractor defines the interface for fetching features from a sandbox' analysis of a sample.
|
||||
DynamicFeatureExtractor defines the interface for fetching features from a
|
||||
sandbox' analysis of a sample; extractors that rely on statically analyzing
|
||||
a sample must implement the sibling extractor, StaticFeatureExtractor.
|
||||
|
||||
Features are grouped mainly into threads that alongside their meta-features are also grouped into
|
||||
processes (that also have their own features). Other scopes (such as function and file) may also apply
|
||||
@@ -336,3 +340,6 @@ class DynamicExtractor(FeatureExtractor):
|
||||
- network activity
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
FeatureExtractor = StaticFeatureExtractor | DynamicFeatureExtractor
|
||||
|
||||
@@ -17,10 +17,10 @@ import capa.features.extractors.binja.function
|
||||
import capa.features.extractors.binja.basicblock
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
|
||||
|
||||
|
||||
class BinjaFeatureExtractor(FeatureExtractor):
|
||||
class BinjaFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, bv: binja.BinaryView):
|
||||
super().__init__()
|
||||
self.bv = bv
|
||||
|
||||
@@ -14,12 +14,12 @@ import capa.features.extractors.cape.global_
|
||||
import capa.features.extractors.cape.process
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address
|
||||
from capa.features.extractors.base_extractor import ThreadHandle, ProcessHandle, DynamicExtractor
|
||||
from capa.features.extractors.base_extractor import ThreadHandle, ProcessHandle, DynamicFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CapeExtractor(DynamicExtractor):
|
||||
class CapeExtractor(DynamicFeatureExtractor):
|
||||
def __init__(self, static: Dict, behavior: Dict):
|
||||
super().__init__()
|
||||
self.static = static
|
||||
|
||||
@@ -14,7 +14,7 @@ import capa.features.extractors.cape.global_
|
||||
import capa.features.extractors.cape.process
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import ThreadHandle, ProcessHandle, DynamicExtractor
|
||||
from capa.features.extractors.base_extractor import ThreadHandle, ProcessHandle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ import capa.features.extractors.dnfile.function
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress
|
||||
from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
|
||||
from capa.features.extractors.dnfile.helpers import (
|
||||
get_dotnet_types,
|
||||
get_dotnet_fields,
|
||||
@@ -67,7 +67,7 @@ class DnFileFeatureExtractorCache:
|
||||
return self.types.get(token, None)
|
||||
|
||||
|
||||
class DnfileFeatureExtractor(FeatureExtractor):
|
||||
class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
super().__init__()
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
|
||||
@@ -17,7 +17,7 @@ from capa.features.common import (
|
||||
Feature,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import StaticFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -73,7 +73,7 @@ GLOBAL_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
class DnfileFeatureExtractor(FeatureExtractor):
|
||||
class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
super().__init__()
|
||||
self.path: str = path
|
||||
|
||||
@@ -23,7 +23,7 @@ from capa.features.common import (
|
||||
Characteristic,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import StaticFeatureExtractor
|
||||
from capa.features.extractors.dnfile.helpers import (
|
||||
DnType,
|
||||
iter_dotnet_table,
|
||||
@@ -157,7 +157,7 @@ GLOBAL_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
class DotnetFileFeatureExtractor(FeatureExtractor):
|
||||
class DotnetFileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
super().__init__()
|
||||
self.path: str = path
|
||||
|
||||
@@ -15,7 +15,7 @@ import capa.features.extractors.common
|
||||
from capa.features.file import Import, Section
|
||||
from capa.features.common import OS, FORMAT_ELF, Arch, Format, Feature
|
||||
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import StaticFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -106,7 +106,7 @@ GLOBAL_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
class ElfFeatureExtractor(FeatureExtractor):
|
||||
class ElfFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
super().__init__()
|
||||
self.path = path
|
||||
|
||||
@@ -18,10 +18,10 @@ import capa.features.extractors.ida.function
|
||||
import capa.features.extractors.ida.basicblock
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
|
||||
|
||||
|
||||
class IdaFeatureExtractor(FeatureExtractor):
|
||||
class IdaFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
|
||||
@@ -3,7 +3,7 @@ from dataclasses import dataclass
|
||||
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -24,7 +24,7 @@ class FunctionFeatures:
|
||||
|
||||
|
||||
@dataclass
|
||||
class NullFeatureExtractor(FeatureExtractor):
|
||||
class NullFeatureExtractor(StaticFeatureExtractor):
|
||||
"""
|
||||
An extractor that extracts some user-provided features.
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import OS, ARCH_I386, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Characteristic
|
||||
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import StaticFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -172,7 +172,7 @@ GLOBAL_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
class PefileFeatureExtractor(FeatureExtractor):
|
||||
class PefileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
super().__init__()
|
||||
self.path = path
|
||||
|
||||
@@ -19,12 +19,12 @@ import capa.features.extractors.viv.function
|
||||
import capa.features.extractors.viv.basicblock
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VivisectFeatureExtractor(FeatureExtractor):
|
||||
class VivisectFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, vw, path, os):
|
||||
super().__init__()
|
||||
self.vw = vw
|
||||
|
||||
@@ -226,7 +226,7 @@ class Freeze(BaseModel):
|
||||
allow_population_by_field_name = True
|
||||
|
||||
|
||||
def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -> str:
|
||||
def dumps(extractor: capa.features.extractors.base_extractor.StaticFeatureExtractor) -> str:
|
||||
"""
|
||||
serialize the given extractor to a string
|
||||
"""
|
||||
@@ -327,7 +327,7 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -
|
||||
return freeze.json()
|
||||
|
||||
|
||||
def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:
|
||||
def loads(s: str) -> capa.features.extractors.base_extractor.StaticFeatureExtractor:
|
||||
"""deserialize a set of features (as a NullFeatureExtractor) from a string."""
|
||||
import capa.features.extractors.null as null
|
||||
|
||||
@@ -363,7 +363,7 @@ def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:
|
||||
MAGIC = "capa0000".encode("ascii")
|
||||
|
||||
|
||||
def dump(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -> bytes:
|
||||
def dump(extractor: capa.features.extractors.base_extractor.StaticFeatureExtractor) -> bytes:
|
||||
"""serialize the given extractor to a byte array."""
|
||||
return MAGIC + zlib.compress(dumps(extractor).encode("utf-8"))
|
||||
|
||||
@@ -372,7 +372,7 @@ def is_freeze(buf: bytes) -> bool:
|
||||
return buf[: len(MAGIC)] == MAGIC
|
||||
|
||||
|
||||
def load(buf: bytes) -> capa.features.extractors.base_extractor.FeatureExtractor:
|
||||
def load(buf: bytes) -> capa.features.extractors.base_extractor.StaticFeatureExtractor:
|
||||
"""deserialize a set of features (as a NullFeatureExtractor) from a byte array."""
|
||||
if not is_freeze(buf):
|
||||
raise ValueError("missing magic header")
|
||||
|
||||
21
capa/main.py
21
capa/main.py
@@ -76,7 +76,14 @@ from capa.features.common import (
|
||||
FORMAT_RESULT,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.base_extractor import (
|
||||
BBHandle,
|
||||
InsnHandle,
|
||||
FunctionHandle,
|
||||
FeatureExtractor,
|
||||
StaticFeatureExtractor,
|
||||
DynamicFeatureExtractor,
|
||||
)
|
||||
|
||||
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
|
||||
SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)"
|
||||
@@ -117,7 +124,7 @@ def set_vivisect_log_level(level):
|
||||
|
||||
|
||||
def find_instruction_capabilities(
|
||||
ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules for the given instruction.
|
||||
@@ -144,7 +151,7 @@ def find_instruction_capabilities(
|
||||
|
||||
|
||||
def find_basic_block_capabilities(
|
||||
ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle, bb: BBHandle
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
|
||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
||||
"""
|
||||
find matches for the given rules within the given basic block.
|
||||
@@ -184,7 +191,7 @@ def find_basic_block_capabilities(
|
||||
|
||||
|
||||
def find_code_capabilities(
|
||||
ruleset: RuleSet, extractor: FeatureExtractor, fh: FunctionHandle
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
|
||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
"""
|
||||
find matches for the given rules within the given function.
|
||||
@@ -242,7 +249,9 @@ def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, functi
|
||||
return matches, len(file_features)
|
||||
|
||||
|
||||
def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None) -> Tuple[MatchResults, Any]:
|
||||
def find_capabilities(
|
||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
|
||||
) -> Tuple[MatchResults, Any]:
|
||||
all_function_matches = collections.defaultdict(list) # type: MatchResults
|
||||
all_bb_matches = collections.defaultdict(list) # type: MatchResults
|
||||
all_insn_matches = collections.defaultdict(list) # type: MatchResults
|
||||
@@ -744,7 +753,7 @@ def collect_metadata(
|
||||
format_: str,
|
||||
os_: str,
|
||||
rules_path: List[str],
|
||||
extractor: capa.features.extractors.base_extractor.FeatureExtractor,
|
||||
extractor: FeatureExtractor,
|
||||
) -> rdoc.Metadata:
|
||||
md5 = hashlib.md5()
|
||||
sha1 = hashlib.sha1()
|
||||
|
||||
Reference in New Issue
Block a user